From da827678b21e94c74fd17c9b8683edb60f73e814 Mon Sep 17 00:00:00 2001 From: Bo Li Date: Fri, 30 Dec 2011 19:47:09 -0600 Subject: [PATCH] relax the requirement about GTF attribute format --- EM.cpp | 36 +++++++++++++++++++++++++----------- GTFItem.h | 41 ++++++++++++++++++++++++++++------------- README.md | 19 +++++++++++-------- makefile | 2 +- utils.h | 23 +++++++++++++++++++++++ 5 files changed, 88 insertions(+), 33 deletions(-) diff --git a/EM.cpp b/EM.cpp index f9449a8..cf53fc3 100644 --- a/EM.cpp +++ b/EM.cpp @@ -465,14 +465,18 @@ void EM() { //E step for (int i = 0; i < nThreads; i++) { rc = pthread_create(&threads[i], &attr, E_STEP, (void*)(&fparams[i])); - if (rc != 0) { fprintf(stderr, "Cannot create thread %d at ROUND %d! (numbered from 0)\n", i, ROUND); exit(-1); } - //assert(rc == 0); + if (rc != 0) { + fprintf(stderr, "Cannot create thread %d at ROUND %d! (numbered from 0)", i, ROUND); + pthread_exception(rc); + } } for (int i = 0; i < nThreads; i++) { rc = pthread_join(threads[i], &status); - if (rc != 0) { fprintf(stderr, "Cannot join thread %d at ROUND %d! (numbered from 0)\n", i, ROUND); exit(-1); } - //assert(rc == 0); + if (rc != 0) { + fprintf(stderr, "Cannot join thread %d at ROUND %d! (numbered from 0)\n", i, ROUND); + pthread_exception(rc); + } } model.setNeedCalcConPrb(false); @@ -518,11 +522,17 @@ void EM() { if (model.getNeedCalcConPrb()) { for (int i = 0; i < nThreads; i++) { rc = pthread_create(&threads[i], &attr, calcConProbs, (void*)(&fparams[i])); - if (rc != 0) { fprintf(stderr, "Cannot create thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i); exit(-1); } + if (rc != 0) { + fprintf(stderr, "Cannot create thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i); + pthread_exception(rc); + } } for (int i = 0; i < nThreads; i++) { rc = pthread_join(threads[i], &status); - if (rc != 0) { fprintf(stderr, "Cannot join thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i); exit(-1); } + if (rc != 0) { + fprintf(stderr, "Cannot join thread %d when generate files for Gibbs sampler! (numbered from 0)\n", i); + pthread_exception(rc); + } } } model.setNeedCalcConPrb(false); @@ -576,13 +586,17 @@ void EM() { for (int i = 0; i <= M; i++) probv[i] = theta[i]; for (int i = 0; i < nThreads; i++) { rc = pthread_create(&threads[i], &attr, E_STEP, (void*)(&fparams[i])); - if (rc != 0) { fprintf(stderr, "Cannot create thread %d when calculate expected weights! (numbered from 0)\n", i); exit(-1); } - //assert(rc == 0); + if (rc != 0) { + fprintf(stderr, "Cannot create thread %d when calculate expected weights! (numbered from 0)\n", i); + pthread_exception(rc); + } } for (int i = 0; i < nThreads; i++) { rc = pthread_join(threads[i], &status); - if (rc != 0) { fprintf(stderr, "Cannot join thread %d! (numbered from 0) when calculate expected weights!\n", i); exit(-1); } - //assert(rc == 0); + if (rc != 0) { + fprintf(stderr, "Cannot join thread %d! (numbered from 0) when calculate expected weights!\n", i); + pthread_exception(rc); + } } model.setNeedCalcConPrb(false); for (int i = 1; i < nThreads; i++) { @@ -596,7 +610,7 @@ void EM() { pthread_attr_destroy(&attr); //convert theta' to theta - double *mw = model.getMW(); + double *mw = model.getMW(); sum = 0.0; for (int i = 0; i <= M; i++) { theta[i] = (mw[i] < EPSILON ? 0.0 : theta[i] / mw[i]); diff --git a/GTFItem.h b/GTFItem.h index e2403d9..4e50270 100644 --- a/GTFItem.h +++ b/GTFItem.h @@ -54,19 +54,34 @@ class GTFItem { strand = tmp[0]; getline(strin, frame, '\t'); - getline(strin, tmp, ';'); tmp = cleanStr(tmp); - my_assert((tmp.substr(0, 7) == "gene_id"), line, "Identifier should be gene_id!"); - tmp = cleanStr(tmp.substr(7)); - my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); - gene_id = tmp.substr(1, tmp.length() - 2); - - getline(strin, tmp, ';'); tmp = cleanStr(tmp); - my_assert((tmp.substr(0, 13) == "transcript_id"), line, "Identifier should be transcript_id!"); - tmp = cleanStr(tmp.substr(13)); - my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); - transcript_id = tmp.substr(1, tmp.length() - 2); - - getline(strin, left); + getline(strin, left); // assign attributes and possible comments into "left" + + strin.clear(); strin.str(left); + bool find_gene_id = false, find_transcript_id = false; + + while (getline(strin, tmp, ';') && (!find_gene_id || !find_transcript_id)) { + tmp = cleanStr(tmp); + size_t pos = tmp.find(' '); + my_assert((pos != std::string::npos), line, "Cannot separate the identifier from the value for attribute " + tmp + "!"); + std::string identifier = tmp.substr(0, pos); + + if (identifier == "gene_id") { + my_assert(!find_gene_id, line, "gene_id appear more than once!"); + tmp = cleanStr(tmp.substr(pos)); + my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); + gene_id = tmp.substr(1, tmp.length() - 2); + find_gene_id = true; + } else if (identifier == "transcript_id") { + my_assert(!find_transcript_id, line, "transcript_id appear more than once!"); + tmp = cleanStr(tmp.substr(pos)); + my_assert((tmp[0] == '"' && tmp[tmp.length() - 1] == '"'), line, "Textual attributes should be surrounded by doublequotes!"); + transcript_id = tmp.substr(1, tmp.length() - 2); + find_transcript_id = true; + } + } + + my_assert(find_gene_id, line, "Cannot find gene_id!"); + my_assert(find_transcript_id, line, "Cannot find transcript_id!"); } std::string getSeqName() { return seqname; } diff --git a/README.md b/README.md index b46826a..6076ce6 100644 --- a/README.md +++ b/README.md @@ -132,24 +132,27 @@ unsorted BAM file, 'sample_name.genome.sorted.bam' and generated by the samtools included. All these files are in genomic coordinates. -#### a) Generating a UCSC Wiggle file +#### a) Generating a Wiggle file A wiggle plot representing the expected number of reads overlapping -each position in the genome can be generated from the sorted genome -BAM file output. To generate the wiggle plot, run the 'rsem-bam2wig' -program on the 'sample_name.genome.sorted.bam' file. +each position in the genome/transcript set can be generated from the +sorted genome/transcript BAM file output. To generate the wiggle +plot, run the 'rsem-bam2wig' program on the +'sample_name.genome.sorted.bam'/'sample_name.transcript.sorted.bam' file. Usage: - rsem-bam2wig bam_input wig_output wiggle_name + rsem-bam2wig sorted_bam_input wig_output wiggle_name -bam_input: sorted bam file +sorted_bam_input: sorted bam file wig_output: output file name, e.g. output.wig wiggle_name: the name the user wants to use for this wiggle plot -#### b) Loading a BAM and/or Wiggle file into the UCSC Genome Browser +#### b) Loading a BAM and/or Wiggle file into the UCSC Genome Browser or Integrative Genomics Viewer(IGV) -Refer to the [UCSC custom track help page](http://genome.ucsc.edu/goldenPath/help/customTrack.html). +For UCSC genome browser, please refer to the [UCSC custom track help page](http://genome.ucsc.edu/goldenPath/help/customTrack.html). + +For integrative genomics viewer, please refer to the [IGV home page](http://www.broadinstitute.org/software/igv/home). #### c) Generating Transcript Wiggle Plots diff --git a/makefile b/makefile index 6b11536..1b2b1b9 100644 --- a/makefile +++ b/makefile @@ -112,7 +112,7 @@ simulation.o : utils.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedE $(CC) $(COFLAGS) simulation.cpp rsem-run-gibbs : Gibbs.o - $(CC) -o rsem-run-gibbs Gibbs.o -lpthread + $(CC) -o rsem-run-gibbs Gibbs.o #some header files are omitted Gibbs.o : utils.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h RefSeq.h RefSeqPolicy.h PolyARules.h Refs.h GroupInfo.h sampling.h boost/random.hpp Gibbs.cpp diff --git a/utils.h b/utils.h index 0991fb9..4fa0750 100644 --- a/utils.h +++ b/utils.h @@ -10,6 +10,7 @@ #include #include #include +#include const int STRLEN = 10005 ; const double EPSILON = 1e-300; @@ -161,4 +162,26 @@ void exitWithError(const char* errmsg) { exit(-1); } +void pthread_exception(int rc) { + switch(rc) { + case EAGAIN: + fprintf(stderr, "Error code: EAGAIN. Insufficient resources to create another thread, or a system-imposed limit on the number of threads was encountered.\n"); + break; + case EINVAL: + fprintf(stderr, "Error code: EINVAL. Invalid settings in attr if pthread_create() is called. Or the implementation has detected that the value specified by thread_id does not refer to a joinable thread if pthread_join() is called.\n"); + break; + case EPERM: + fprintf(stderr, "Error code: EPERM. No permission to set the scheduling policy and parameters specified in attr.\n"); + break; + case EDEADLK: + fprintf(stderr, "Error code: EDEADLK. A deadlock was detected (e.g., two threads tried to join with each other); or thread_id specifies the calling thread."); + break; + case ESRCH: + fprintf(stderr, "Error code: ESRCH. No thread with thread_id could be found.\n"); + break; + default: fprintf(stderr, "Unknown error code: %d\n", rc); + } + exit(-1); +} + #endif -- 2.39.2