From: Bo Li <bli@cs.wisc.edu>
Date: Sat, 31 Dec 2011 01:47:09 +0000 (-0600)
Subject: relax the requirement about GTF attribute format
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=da827678b21e94c74fd17c9b8683edb60f73e814;p=rsem.git

relax the requirement about GTF attribute format
---

diff --git a/EM.cpp b/EM.cpp
index f9449a8..cf53fc3 100644
--- a/EM.cpp
+++ b/EM.cpp
@@ -465,14 +465,18 @@ void EM() {
 		//E step
 		for (int i = 0; i < nThreads; i++) {
 			rc = pthread_create(&threads[i], &attr, E_STEP<ReadType, HitType, ModelType>, (void*)(&fparams[i]));
-			if (rc != 0) { fprintf(stderr, "Cannot create thread %d at ROUND %d! (numbered from 0)\n", i, ROUND); exit(-1); }
-			//assert(rc == 0);
+			if (rc != 0) {
+				fprintf(stderr, "Cannot create thread %d at ROUND %d! (numbered from 0)", i, ROUND);
+				pthread_exception(rc);
+			}
 		}
 
 		for (int i = 0; i < nThreads; i++) {
 			rc = pthread_join(threads[i], &status);
-			if (rc != 0) { fprintf(stderr, "Cannot join thread %d at ROUND %d! (numbered from 0)\n", i, ROUND); exit(-1); }
-			//assert(rc == 0);
+			if (rc != 0) {
+				fprintf(stderr, "Cannot join thread %d at ROUND %d! (numbered from 0)\n", i, ROUND);
+				pthread_exception(rc);
+			}
 		}
 
 		model.setNeedCalcConPrb(false);
@@ -518,11 +522,17 @@ void EM() {
 		if (model.getNeedCalcConPrb()) {
 			for (int i = 0; i < nThreads; i++) {
 				rc = pthread_create(&threads[i], &attr, calcConProbs<ReadType, HitType, ModelType>, (void*)(&fparams[i]));
-				if (rc != 0) { fprintf(stderr, "Cannot create thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i); exit(-1); }
+				if (rc != 0) {
+					fprintf(stderr, "Cannot create thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i);
+					pthread_exception(rc);
+				}
 			}
 			for (int i = 0; i < nThreads; i++) {
 				rc = pthread_join(threads[i], &status);
-				if (rc != 0) { fprintf(stderr, "Cannot join thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i); exit(-1); }
+				if (rc != 0) {
+					fprintf(stderr, "Cannot join thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i);
+					pthread_exception(rc);
+				}
 			}
 		}
 		model.setNeedCalcConPrb(false);
@@ -576,13 +586,17 @@ void EM() {
 	for (int i = 0; i <= M; i++) probv[i] = theta[i];
 	for (int i = 0; i < nThreads; i++) {
 		rc = pthread_create(&threads[i], &attr, E_STEP<ReadType, HitType, ModelType>, (void*)(&fparams[i]));
-		if (rc != 0) { fprintf(stderr, "Cannot create thread %d when calculate expected weights! (numbered from 0)\n", i); exit(-1); }
-		//assert(rc == 0);
+		if (rc != 0) {
+			fprintf(stderr, "Cannot create thread %d when calculate expected weights! (numbered from 0)\n", i);
+			pthread_exception(rc);
+		}
 	}
 	for (int i = 0; i < nThreads; i++) {
 		rc = pthread_join(threads[i], &status);
-		if (rc != 0) { fprintf(stderr, "Cannot join thread %d! (numbered from 0) when calculate expected weights!\n", i); exit(-1); }
-		//assert(rc == 0);
+		if (rc != 0) {
+			fprintf(stderr, "Cannot join thread %d! (numbered from 0) when calculate expected weights!\n", i);
+			pthread_exception(rc);
+		}
 	}
 	model.setNeedCalcConPrb(false);
 	for (int i = 1; i < nThreads; i++) {
@@ -596,7 +610,7 @@ void EM() {
 	pthread_attr_destroy(&attr);
 
 	//convert theta' to theta
-       	double *mw = model.getMW();
+    double *mw = model.getMW();
 	sum = 0.0;
 	for (int i = 0; i <= M; i++) {
 	  theta[i] = (mw[i] < EPSILON ? 0.0 : theta[i] / mw[i]);
diff --git a/GTFItem.h b/GTFItem.h
index e2403d9..4e50270 100644
--- a/GTFItem.h
+++ b/GTFItem.h
@@ -54,19 +54,34 @@ class GTFItem {
     strand = tmp[0];
     getline(strin, frame, '\t');
 
-    getline(strin, tmp, ';'); tmp = cleanStr(tmp);
-    my_assert((tmp.substr(0, 7) == "gene_id"), line, "Identifier should be gene_id!");
-    tmp = cleanStr(tmp.substr(7));
-    my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
-    gene_id = tmp.substr(1, tmp.length() - 2);
-
-    getline(strin, tmp, ';'); tmp = cleanStr(tmp);
-    my_assert((tmp.substr(0, 13) == "transcript_id"), line, "Identifier should be transcript_id!");
-    tmp = cleanStr(tmp.substr(13));
-    my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
-    transcript_id = tmp.substr(1, tmp.length() - 2);
-
-    getline(strin, left);
+    getline(strin, left); // assign attributes and possible comments into "left"
+
+    strin.clear(); strin.str(left);
+    bool find_gene_id = false, find_transcript_id = false;
+
+    while (getline(strin, tmp, ';') && (!find_gene_id || !find_transcript_id)) {
+    	tmp = cleanStr(tmp);
+    	size_t pos = tmp.find(' ');
+    	my_assert((pos != std::string::npos), line, "Cannot separate the identifier from the value for attribute " + tmp + "!");
+    	std::string identifier = tmp.substr(0, pos);
+
+    	if (identifier == "gene_id") {
+    		my_assert(!find_gene_id, line, "gene_id appear more than once!");
+    	    tmp = cleanStr(tmp.substr(pos));
+    	    my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
+    	    gene_id = tmp.substr(1, tmp.length() - 2);
+    	    find_gene_id = true;
+    	} else if (identifier == "transcript_id") {
+    		my_assert(!find_transcript_id, line, "transcript_id appear more than once!");
+    	    tmp = cleanStr(tmp.substr(pos));
+    	    my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!");
+    	    transcript_id = tmp.substr(1, tmp.length() - 2);
+    	    find_transcript_id = true;
+    	}
+    }
+
+    my_assert(find_gene_id, line, "Cannot find gene_id!");
+    my_assert(find_transcript_id, line, "Cannot find transcript_id!");
   }
 
   std::string getSeqName() { return seqname; }
diff --git a/README.md b/README.md
index b46826a..6076ce6 100644
--- a/README.md
+++ b/README.md
@@ -132,24 +132,27 @@ unsorted BAM file, 'sample_name.genome.sorted.bam' and
 generated by the samtools included. All these files are in genomic
 coordinates.
 
-#### a) Generating a UCSC Wiggle file
+#### a) Generating a Wiggle file
 
 A wiggle plot representing the expected number of reads overlapping
-each position in the genome can be generated from the sorted genome
-BAM file output.  To generate the wiggle plot, run the 'rsem-bam2wig'
-program on the 'sample_name.genome.sorted.bam' file.
+each position in the genome/transcript set can be generated from the
+sorted genome/transcript BAM file output.  To generate the wiggle
+plot, run the 'rsem-bam2wig' program on the
+'sample_name.genome.sorted.bam'/'sample_name.transcript.sorted.bam' file.
 
 Usage:    
 
-    rsem-bam2wig bam_input wig_output wiggle_name
+    rsem-bam2wig sorted_bam_input wig_output wiggle_name
 
-bam_input: sorted bam file   
+sorted_bam_input: sorted bam file   
 wig_output: output file name, e.g. output.wig   
 wiggle_name: the name the user wants to use for this wiggle plot  
 
-#### b) Loading a BAM and/or Wiggle file into the UCSC Genome Browser
+#### b) Loading a BAM and/or Wiggle file into the UCSC Genome Browser or Integrative Genomics Viewer(IGV)
 
-Refer to the [UCSC custom track help page](http://genome.ucsc.edu/goldenPath/help/customTrack.html).
+For UCSC genome browser, please refer to the [UCSC custom track help page](http://genome.ucsc.edu/goldenPath/help/customTrack.html).
+
+For integrative genomics viewer, please refer to the [IGV home page](http://www.broadinstitute.org/software/igv/home).
 
 #### c) Generating Transcript Wiggle Plots
 
diff --git a/makefile b/makefile
index 6b11536..1b2b1b9 100644
--- a/makefile
+++ b/makefile
@@ -112,7 +112,7 @@ simulation.o : utils.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedE
 	$(CC) $(COFLAGS) simulation.cpp
 
 rsem-run-gibbs : Gibbs.o
-	$(CC) -o rsem-run-gibbs Gibbs.o -lpthread
+	$(CC) -o rsem-run-gibbs Gibbs.o
 
 #some header files are omitted
 Gibbs.o : utils.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h sampling.h boost/random.hpp Gibbs.cpp 
diff --git a/utils.h b/utils.h
index 0991fb9..4fa0750 100644
--- a/utils.h
+++ b/utils.h
@@ -10,6 +10,7 @@
 #include<cassert>
 #include<string>
 #include<vector>
+#include<cerrno>
 
 const int STRLEN = 10005 ;
 const double EPSILON = 1e-300;
@@ -161,4 +162,26 @@ void exitWithError(const char* errmsg) {
 	exit(-1);
 }
 
+void pthread_exception(int rc) {
+	switch(rc) {
+	case EAGAIN:
+		fprintf(stderr, "Error code: EAGAIN. Insufficient resources to create another thread, or a system-imposed limit on the number of threads was encountered.\n");
+		break;
+	case EINVAL:
+		fprintf(stderr, "Error code: EINVAL. Invalid settings in attr if pthread_create() is called. Or the implementation has detected that the value specified by thread_id does not refer to a joinable thread if pthread_join() is called.\n");
+		break;
+	case EPERM:
+		fprintf(stderr, "Error code: EPERM. No permission to set the scheduling policy and parameters specified in attr.\n");
+		break;
+	case EDEADLK:
+		fprintf(stderr, "Error code: EDEADLK. A deadlock was detected (e.g., two threads tried to join with each other); or thread_id specifies the calling thread.");
+		break;
+	case ESRCH:
+		fprintf(stderr, "Error code: ESRCH. No thread with thread_id could be found.\n");
+		break;
+	default: fprintf(stderr, "Unknown error code: %d\n", rc);
+	}
+	exit(-1);
+}
+
 #endif