X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=EM.cpp;h=318b7eba94cfd9a31cb3009f2b6cf0d627b59df1;hb=e123915d329682a1712bf77204e556baadb69a2f;hp=00cb8527afe966771b61408debe86a74ad0294a1;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387;p=rsem.git

diff --git a/EM.cpp b/EM.cpp
index 00cb852..318b7eb 100644
--- a/EM.cpp
+++ b/EM.cpp
@@ -64,6 +64,7 @@ bool genGibbsOut; // generate file for Gibbs sampler
 char refName[STRLEN], imdName[STRLEN], outName[STRLEN];
 char refF[STRLEN], groupF[STRLEN], cntF[STRLEN], tiF[STRLEN];
 char mparamsF[STRLEN], bmparamsF[STRLEN];
+char modelF[STRLEN], thetaF[STRLEN];
 
 char inpSamType;
 char *pt_fn_list, *pt_chr_list;
@@ -301,35 +302,19 @@ void calcExpectedEffectiveLengths(ModelType& model) {
 template<class ModelType>
 void writeResults(ModelType& model, double* counts) {
 	double denom;
-	char modelF[STRLEN], thetaF[STRLEN];
 	char outF[STRLEN];
 	FILE *fo;
 
 	sprintf(modelF, "%s.model", outName);
 	model.write(modelF);
 
-	sprintf(thetaF, "%s.theta", outName);
-	fo = fopen(thetaF, "w");
-	fprintf(fo, "%d\n", M + 1);
-	for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
-	fprintf(fo, "%.15g\n", theta[M]);
-	fclose(fo);
-
-
-	//calculate normalized read fraction
-	double *nrf = new double[M + 1];
-	memset(nrf, 0, sizeof(double) * (M + 1));
-	denom = 1.0 - theta[0];
-	if (denom <= 0) { fprintf(stderr, "No alignable reads?!\n"); exit(-1); }
-	for (int i = 1; i <= M; i++) nrf[i] = theta[i] / denom;
-
 	//calculate tau values
 	double *tau = new double[M + 1];
 	memset(tau, 0, sizeof(double) * (M + 1));
 
 	denom = 0.0;
 	for (int i = 1; i <= M; i++) 
-	  if (eel[i] > EPSILON) {
+	  if (eel[i] >= EPSILON) {
 	    tau[i] = theta[i] / eel[i];
 	    denom += tau[i];
 	  }   
@@ -348,8 +333,6 @@ void writeResults(ModelType& model, double* counts) {
 	}
 	for (int i = 1; i <= M; i++)
 		fprintf(fo, "%.2f%c", counts[i], (i < M ? '\t' : '\n'));
-	for (int i = 1; i <= M; i++)
-		fprintf(fo, "%.15g%c", nrf[i], (i < M ? '\t' : '\n'));
 	for (int i = 1; i <= M; i++)
 		fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
 	for (int i = 1; i <= M; i++) {
@@ -371,12 +354,6 @@ void writeResults(ModelType& model, double* counts) {
 		for (int j = b; j < e; j++) sumC += counts[j];
 		fprintf(fo, "%.2f%c", sumC, (i < m - 1 ? '\t' : '\n'));
 	}
-	for (int i = 0; i < m; i++) {
-		double sumN = 0.0; // sum of normalized read fraction
-		int b = gi.spAt(i), e = gi.spAt(i + 1);
-		for (int j = b; j < e; j++) sumN += nrf[j];
-		fprintf(fo, "%.15g%c", sumN, (i < m - 1 ? '\t' : '\n'));
-	}
 	for (int i = 0; i < m; i++) {
 		double sumT = 0.0; // sum of tau values
 		int b = gi.spAt(i), e = gi.spAt(i + 1);
@@ -391,7 +368,6 @@ void writeResults(ModelType& model, double* counts) {
 	}
 	fclose(fo);
 
-	delete[] nrf;
 	delete[] tau;
 
 	if (verbose) { printf("Expression Results are written!\n"); }
@@ -425,6 +401,8 @@ inline bool doesUpdateModel(int ROUND) {
 //Including initialize, algorithm and results saving
 template<class ReadType, class HitType, class ModelType>
 void EM() {
+	FILE *fo;
+
 	int ROUND;
 	double sum;
 
@@ -443,6 +421,7 @@ void EM() {
 	void *status;
 	int rc;
 
+
 	//initialize boolean variables
 	updateModel = calcExpectedWeights = false;
 
@@ -472,7 +451,6 @@ void EM() {
 	pthread_attr_init(&attr);
 	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
 
-
 	ROUND = 0;
 	do {
 		++ROUND;
@@ -532,17 +510,6 @@ void EM() {
 
 	if (totNum > 0) fprintf(stderr, "Warning: RSEM reaches %d iterations before meeting the convergence criteria.\n", MAX_ROUND);
 
-	//calculate expected effective lengths for each isoform
-	calcExpectedEffectiveLengths<ModelType>(model);
-
-	//correct theta vector
-	sum = theta[0];
-	for (int i = 1; i <= M; i++) 
-	  if (eel[i] < EPSILON) { theta[i] = 0.0; }
-	  else sum += theta[i];
-	if (sum < EPSILON) { fprintf(stderr, "No Expected Effective Length is no less than %.6g?!\n", MINEEL); exit(-1); }
-	for (int i = 0; i <= M; i++) theta[i] /= sum;
-
 	//generate output file used by Gibbs sampler
 	if (genGibbsOut) {
 		if (model.getNeedCalcConPrb()) {
@@ -558,7 +525,7 @@ void EM() {
 		model.setNeedCalcConPrb(false);
 
 		sprintf(out_for_gibbs_F, "%s.ofg", imdName);
-		FILE *fo = fopen(out_for_gibbs_F, "w");
+		fo = fopen(out_for_gibbs_F, "w");
 		fprintf(fo, "%d %d\n", M, N0);
 		for (int i = 0; i < nThreads; i++) {
 			int numN = hitvs[i]->getN();
@@ -567,7 +534,7 @@ void EM() {
 				int to = hitvs[i]->getSAt(j + 1);
 				int totNum = 0;
 
-				if (ncpvs[i][j] > 0.0) { ++totNum; fprintf(fo, "%d %.15g ", 0, ncpvs[i][j]); }
+				if (ncpvs[i][j] >= EPSILON) { ++totNum; fprintf(fo, "%d %.15g ", 0, ncpvs[i][j]); }
 				for (int k = fr; k < to; k++) {
 					HitType &hit = hitvs[i]->getHitAt(k);
 					if (hit.getConPrb() >= EPSILON) {
@@ -580,8 +547,33 @@ void EM() {
 			}
 		}
 		fclose(fo);
+
+		char scoreF[STRLEN];
+		sprintf(scoreF, "%s.ns", imdName);
+		fo = fopen(scoreF, "w");
+		fprintf(fo, "%.15g\n", model.getLogP());
+		fclose(fo);
 	}
 
+	sprintf(thetaF, "%s.theta", outName);
+	fo = fopen(thetaF, "w");
+	fprintf(fo, "%d\n", M + 1);
+
+	// output theta'
+	for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
+	fprintf(fo, "%.15g\n", theta[M]);
+	
+	//calculate expected effective lengths for each isoform
+	calcExpectedEffectiveLengths<ModelType>(model);
+
+	//correct theta vector
+	sum = theta[0];
+	for (int i = 1; i <= M; i++) 
+	  if (eel[i] < EPSILON) { theta[i] = 0.0; }
+	  else sum += theta[i];
+	if (sum < EPSILON) { fprintf(stderr, "No Expected Effective Length is no less than %.6g?!\n", MINEEL); exit(-1); }
+	for (int i = 0; i <= M; i++) theta[i] /= sum;
+
 	//calculate expected weights and counts using learned parameters
 	updateModel = false; calcExpectedWeights = true;
 	for (int i = 0; i < nThreads; i++) {
@@ -605,8 +597,7 @@ void EM() {
 	/* destroy attribute */
 	pthread_attr_destroy(&attr);
 
-	
-	//for all
+	//convert theta' to theta
        	double *mw = model.getMW();
 	sum = 0.0;
 	for (int i = 0; i <= M; i++) {
@@ -616,6 +607,12 @@ void EM() {
 	assert(sum >= EPSILON);
 	for (int i = 0; i <= M; i++) theta[i] /= sum;
 
+	// output theta
+	for (int i = 0; i < M; i++) fprintf(fo, "%.15g ", theta[i]);
+	fprintf(fo, "%.15g\n", theta[M]);
+
+	fclose(fo);
+
 	writeResults<ModelType>(model, countvs[0]);
 
 	if (genBamF) {