X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=extractRef.cpp;h=0a46de0950534ea4c22f3acb8f239de66ef8415f;hb=1b2999c4407ef8419fb89b66b843b7141caff4da;hp=c4f24a9a079b37a25d39d6fa969ff9ac1482f3de;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387;p=rsem.git diff --git a/extractRef.cpp b/extractRef.cpp index c4f24a9..0a46de0 100644 --- a/extractRef.cpp +++ b/extractRef.cpp @@ -86,8 +86,14 @@ bool buildTranscript(int sp, int ep) { int start = items[i].getStart(); int end = items[i].getEnd(); - assert(strand == items[i].getStrand()); - assert(seqname == items[i].getSeqName()); + if (strand != items[i].getStrand()) { + fprintf(stderr, "According to the GTF file given, a transcript has exons from different orientations!\n"); + exit(-1); + } + if (seqname != items[i].getSeqName()) { + fprintf(stderr, "According to the GTF file given, a transcript has exons on multiple chromosomes!\n"); + exit(-1); + } if (cur_e + 1 < start) { if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); @@ -113,6 +119,7 @@ void parse_gtf_file(char* gtfF) { items.clear(); while (getline(fin, line)) { + if (line[0] == '#') continue; // if this line is comment, jump it item.parse(line); string feature = item.getFeature(); if (feature == "exon") { @@ -127,14 +134,14 @@ void parse_gtf_file(char* gtfF) { else { if (hasMappingFile) { tid = item.getTranscriptID(); - mi_iter = mi_table.find(tid); - if (mi_iter == mi_table.end()) { - fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str()); - exit(-1); - } - //assert(iter != table.end()); - gid = mi_iter->second; - item.setGeneID(gid); + mi_iter = mi_table.find(tid); + if (mi_iter == mi_table.end()) { + fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str()); + exit(-1); + } + //assert(iter != table.end()); + gid = mi_iter->second; + item.setGeneID(gid); } items.push_back(item); } @@ -240,7 +247,7 @@ void writeResults(char* refName) { } int main(int argc, char* argv[]) { - if (argc < 6 || (hasMappingFile = atoi(argv[4])) && argc < 7) { + if (argc < 6 || ((hasMappingFile = atoi(argv[4])) && argc < 7)) { printf("Usage: rsem-extract-reference-transcripts refName quiet gtfF hasMappingFile [mappingFile] chromosome_file_1 [chromosome_file_2 ...]\n"); exit(-1); } @@ -296,7 +303,9 @@ int main(int argc, char* argv[]) { for (int i = 1; i <= M; i++) { if (seqs[i] == "") { - fprintf(stderr, "%s's sequence is empty! You must provide all chromosome files of transcripts which are presented in the .gtf file!\n", transcripts.getTranscriptAt(i).getTranscriptID().c_str()); + const Transcript& transcript = transcripts.getTranscriptAt(i); + fprintf(stderr, "Cannot extract transcript %s's sequence from chromosome %s, whose information might not be provided! Please check if the chromosome directory is set correctly or the list of chromosome files is complete.\n", \ + transcript.getTranscriptID().c_str(), transcript.getGeneID().c_str()); exit(-1); } }