X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=extractRef.cpp;h=1a56cc15e8c615f02e14c7cd15f4e8c787e5a172;hb=52f1bd6f44f9b2630b839f192fb9ece18581983b;hp=c4f24a9a079b37a25d39d6fa969ff9ac1482f3de;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387;p=rsem.git diff --git a/extractRef.cpp b/extractRef.cpp index c4f24a9..1a56cc1 100644 --- a/extractRef.cpp +++ b/extractRef.cpp @@ -50,24 +50,24 @@ map mi_table; // mapping info table map::iterator mi_iter; //mapping info table's iterator void loadMappingInfo(char* mappingF) { - ifstream fin(mappingF); - string line, key, value; - - if (!fin.is_open()) { - fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF); - exit(-1); - } - - mi_table.clear(); - while (getline(fin, line)) { - line = cleanStr(line); - if (line[0] == '#') continue; - istringstream strin(line); - strin>>value>>key; - mi_table[key] = value; - } - - fin.close(); + ifstream fin(mappingF); + string line, key, value; + + if (!fin.is_open()) { + fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF); + exit(-1); + } + + mi_table.clear(); + while (getline(fin, line)) { + line = cleanStr(line); + if (line[0] == '#') continue; + istringstream strin(line); + strin>>value>>key; + mi_table[key] = value; + } + + fin.close(); } bool buildTranscript(int sp, int ep) { @@ -86,8 +86,14 @@ bool buildTranscript(int sp, int ep) { int start = items[i].getStart(); int end = items[i].getEnd(); - assert(strand == items[i].getStrand()); - assert(seqname == items[i].getSeqName()); + if (strand != items[i].getStrand()) { + fprintf(stderr, "According to the GTF file given, a transcript has exons from different orientations!\n"); + exit(-1); + } + if (seqname != items[i].getSeqName()) { + fprintf(stderr, "According to the GTF file given, a transcript has exons on multiple chromosomes!\n"); + exit(-1); + } if (cur_e + 1 < start) { if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); @@ -113,6 +119,7 @@ void parse_gtf_file(char* gtfF) { items.clear(); while (getline(fin, line)) { + if (line[0] == '#') continue; // if this line is comment, jump it item.parse(line); string feature = item.getFeature(); if (feature == "exon") { @@ -127,14 +134,14 @@ void parse_gtf_file(char* gtfF) { else { if (hasMappingFile) { tid = item.getTranscriptID(); - mi_iter = mi_table.find(tid); - if (mi_iter == mi_table.end()) { - fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str()); - exit(-1); - } - //assert(iter != table.end()); - gid = mi_iter->second; - item.setGeneID(gid); + mi_iter = mi_table.find(tid); + if (mi_iter == mi_table.end()) { + fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str()); + exit(-1); + } + //assert(iter != table.end()); + gid = mi_iter->second; + item.setGeneID(gid); } items.push_back(item); } @@ -240,7 +247,7 @@ void writeResults(char* refName) { } int main(int argc, char* argv[]) { - if (argc < 6 || (hasMappingFile = atoi(argv[4])) && argc < 7) { + if (argc < 6 || ((hasMappingFile = atoi(argv[4])) && argc < 7)) { printf("Usage: rsem-extract-reference-transcripts refName quiet gtfF hasMappingFile [mappingFile] chromosome_file_1 [chromosome_file_2 ...]\n"); exit(-1); } @@ -296,7 +303,9 @@ int main(int argc, char* argv[]) { for (int i = 1; i <= M; i++) { if (seqs[i] == "") { - fprintf(stderr, "%s's sequence is empty! You must provide all chromosome files of transcripts which are presented in the .gtf file!\n", transcripts.getTranscriptAt(i).getTranscriptID().c_str()); + const Transcript& transcript = transcripts.getTranscriptAt(i); + fprintf(stderr, "Cannot extract transcript %s's sequence from chromosome %s, whose information might not be provided! Please check if the chromosome directory is set correctly or the list of chromosome files is complete.\n", \ + transcript.getTranscriptID().c_str(), transcript.getSeqName().c_str()); exit(-1); } }