X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=EM.cpp;h=318b7eba94cfd9a31cb3009f2b6cf0d627b59df1;hb=e123915d329682a1712bf77204e556baadb69a2f;hp=00cb8527afe966771b61408debe86a74ad0294a1;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387;p=rsem.git diff --git a/EM.cpp b/EM.cpp index 00cb852..318b7eb 100644 --- a/EM.cpp +++ b/EM.cpp @@ -64,6 +64,7 @@ bool genGibbsOut; // generate file for Gibbs sampler char refName[STRLEN], imdName[STRLEN], outName[STRLEN]; char refF[STRLEN], groupF[STRLEN], cntF[STRLEN], tiF[STRLEN]; char mparamsF[STRLEN], bmparamsF[STRLEN]; +char modelF[STRLEN], thetaF[STRLEN]; char inpSamType; char *pt_fn_list, *pt_chr_list; @@ -301,35 +302,19 @@ void calcExpectedEffectiveLengths(ModelType& model) { template void writeResults(ModelType& model, double* counts) { double denom; - char modelF[STRLEN], thetaF[STRLEN]; char outF[STRLEN]; FILE *fo; sprintf(modelF, "%s.model", outName); model.write(modelF); - sprintf(thetaF, "%s.theta", outName); - fo = fopen(thetaF, "w"); - fprintf(fo, "%d\n", M + 1); - for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]); - fprintf(fo, "%.15g\n", theta[M]); - fclose(fo); - - - //calculate normalized read fraction - double *nrf = new double[M + 1]; - memset(nrf, 0, sizeof(double) * (M + 1)); - denom = 1.0 - theta[0]; - if (denom <= 0) { fprintf(stderr, "No alignable reads?!\n"); exit(-1); } - for (int i = 1; i <= M; i++) nrf[i] = theta[i] / denom; - //calculate tau values double *tau = new double[M + 1]; memset(tau, 0, sizeof(double) * (M + 1)); denom = 0.0; for (int i = 1; i <= M; i++) - if (eel[i] > EPSILON) { + if (eel[i] >= EPSILON) { tau[i] = theta[i] / eel[i]; denom += tau[i]; } @@ -348,8 +333,6 @@ void writeResults(ModelType& model, double* counts) { } for (int i = 1; i <= M; i++) fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n')); - for (int i = 1; i <= M; i++) - fprintf(fo, "%.15g%c", nrf[i], (i < M ? '\t' : '\n')); for (int i = 1; i <= M; i++) fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n')); for (int i = 1; i <= M; i++) { @@ -371,12 +354,6 @@ void writeResults(ModelType& model, double* counts) { for (int j = b; j < e; j++) sumC += counts[j]; fprintf(fo, "%.2f%c", sumC, (i < m - 1 ? '\t' : '\n')); } - for (int i = 0; i < m; i++) { - double sumN = 0.0; // sum of normalized read fraction - int b = gi.spAt(i), e = gi.spAt(i + 1); - for (int j = b; j < e; j++) sumN += nrf[j]; - fprintf(fo, "%.15g%c", sumN, (i < m - 1 ? '\t' : '\n')); - } for (int i = 0; i < m; i++) { double sumT = 0.0; // sum of tau values int b = gi.spAt(i), e = gi.spAt(i + 1); @@ -391,7 +368,6 @@ void writeResults(ModelType& model, double* counts) { } fclose(fo); - delete[] nrf; delete[] tau; if (verbose) { printf("Expression Results are written!\n"); } @@ -425,6 +401,8 @@ inline bool doesUpdateModel(int ROUND) { //Including initialize, algorithm and results saving template void EM() { + FILE *fo; + int ROUND; double sum; @@ -443,6 +421,7 @@ void EM() { void *status; int rc; + //initialize boolean variables updateModel = calcExpectedWeights = false; @@ -472,7 +451,6 @@ void EM() { pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - ROUND = 0; do { ++ROUND; @@ -532,17 +510,6 @@ void EM() { if (totNum > 0) fprintf(stderr, "Warning: RSEM reaches %d iterations before meeting the convergence criteria.\n", MAX_ROUND); - //calculate expected effective lengths for each isoform - calcExpectedEffectiveLengths(model); - - //correct theta vector - sum = theta[0]; - for (int i = 1; i <= M; i++) - if (eel[i] < EPSILON) { theta[i] = 0.0; } - else sum += theta[i]; - if (sum < EPSILON) { fprintf(stderr, "No Expected Effective Length is no less than %.6g?!\n", MINEEL); exit(-1); } - for (int i = 0; i <= M; i++) theta[i] /= sum; - //generate output file used by Gibbs sampler if (genGibbsOut) { if (model.getNeedCalcConPrb()) { @@ -558,7 +525,7 @@ void EM() { model.setNeedCalcConPrb(false); sprintf(out_for_gibbs_F, "%s.ofg", imdName); - FILE *fo = fopen(out_for_gibbs_F, "w"); + fo = fopen(out_for_gibbs_F, "w"); fprintf(fo, "%d %d\n", M, N0); for (int i = 0; i < nThreads; i++) { int numN = hitvs[i]->getN(); @@ -567,7 +534,7 @@ void EM() { int to = hitvs[i]->getSAt(j + 1); int totNum = 0; - if (ncpvs[i][j] > 0.0) { ++totNum; fprintf(fo, "%d %.15g ", 0, ncpvs[i][j]); } + if (ncpvs[i][j] >= EPSILON) { ++totNum; fprintf(fo, "%d %.15g ", 0, ncpvs[i][j]); } for (int k = fr; k < to; k++) { HitType &hit = hitvs[i]->getHitAt(k); if (hit.getConPrb() >= EPSILON) { @@ -580,8 +547,33 @@ void EM() { } } fclose(fo); + + char scoreF[STRLEN]; + sprintf(scoreF, "%s.ns", imdName); + fo = fopen(scoreF, "w"); + fprintf(fo, "%.15g\n", model.getLogP()); + fclose(fo); } + sprintf(thetaF, "%s.theta", outName); + fo = fopen(thetaF, "w"); + fprintf(fo, "%d\n", M + 1); + + // output theta' + for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]); + fprintf(fo, "%.15g\n", theta[M]); + + //calculate expected effective lengths for each isoform + calcExpectedEffectiveLengths(model); + + //correct theta vector + sum = theta[0]; + for (int i = 1; i <= M; i++) + if (eel[i] < EPSILON) { theta[i] = 0.0; } + else sum += theta[i]; + if (sum < EPSILON) { fprintf(stderr, "No Expected Effective Length is no less than %.6g?!\n", MINEEL); exit(-1); } + for (int i = 0; i <= M; i++) theta[i] /= sum; + //calculate expected weights and counts using learned parameters updateModel = false; calcExpectedWeights = true; for (int i = 0; i < nThreads; i++) { @@ -605,8 +597,7 @@ void EM() { /* destroy attribute */ pthread_attr_destroy(&attr); - - //for all + //convert theta' to theta double *mw = model.getMW(); sum = 0.0; for (int i = 0; i <= M; i++) { @@ -616,6 +607,12 @@ void EM() { assert(sum >= EPSILON); for (int i = 0; i <= M; i++) theta[i] /= sum; + // output theta + for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]); + fprintf(fo, "%.15g\n", theta[M]); + + fclose(fo); + writeResults(model, countvs[0]); if (genBamF) {