X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=extractRef.cpp;h=2d2b17cea4fde25be35e6c9ab6925ba404144a74;hb=58d504aaf36ae486b1dba6d03e0e9f1c25855037;hp=12b3222744db41cf1602890b9ae0fccfd2dfedca;hpb=0f1ac7d42823cf0f47d25f4f0a04620357421589;p=rsem.git diff --git a/extractRef.cpp b/extractRef.cpp index 12b3222..2d2b17c 100644 --- a/extractRef.cpp +++ b/extractRef.cpp @@ -50,24 +50,24 @@ map mi_table; // mapping info table map::iterator mi_iter; //mapping info table's iterator void loadMappingInfo(char* mappingF) { - ifstream fin(mappingF); - string line, key, value; - - if (!fin.is_open()) { - fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF); - exit(-1); - } - - mi_table.clear(); - while (getline(fin, line)) { - line = cleanStr(line); - if (line[0] == '#') continue; - istringstream strin(line); - strin>>value>>key; - mi_table[key] = value; - } - - fin.close(); + ifstream fin(mappingF); + string line, key, value; + + if (!fin.is_open()) { + fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF); + exit(-1); + } + + mi_table.clear(); + while (getline(fin, line)) { + line = cleanStr(line); + if (line[0] == '#') continue; + istringstream strin(line); + strin>>value>>key; + mi_table[key] = value; + } + + fin.close(); } bool buildTranscript(int sp, int ep) { @@ -86,8 +86,14 @@ bool buildTranscript(int sp, int ep) { int start = items[i].getStart(); int end = items[i].getEnd(); - assert(strand == items[i].getStrand()); - assert(seqname == items[i].getSeqName()); + if (strand != items[i].getStrand()) { + fprintf(stderr, "According to the GTF file given, a transcript has exons from different orientations!\n"); + exit(-1); + } + if (seqname != items[i].getSeqName()) { + fprintf(stderr, "According to the GTF file given, a transcript has exons on multiple chromosomes!\n"); + exit(-1); + } if (cur_e + 1 < start) { if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e)); @@ -128,14 +134,14 @@ void parse_gtf_file(char* gtfF) { else { if (hasMappingFile) { tid = item.getTranscriptID(); - mi_iter = mi_table.find(tid); - if (mi_iter == mi_table.end()) { - fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str()); - exit(-1); - } - //assert(iter != table.end()); - gid = mi_iter->second; - item.setGeneID(gid); + mi_iter = mi_table.find(tid); + if (mi_iter == mi_table.end()) { + fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str()); + exit(-1); + } + //assert(iter != table.end()); + gid = mi_iter->second; + item.setGeneID(gid); } items.push_back(item); } @@ -241,7 +247,7 @@ void writeResults(char* refName) { } int main(int argc, char* argv[]) { - if (argc < 6 || (hasMappingFile = atoi(argv[4])) && argc < 7) { + if (argc < 6 || ((hasMappingFile = atoi(argv[4])) && argc < 7)) { printf("Usage: rsem-extract-reference-transcripts refName quiet gtfF hasMappingFile [mappingFile] chromosome_file_1 [chromosome_file_2 ...]\n"); exit(-1); } @@ -297,7 +303,9 @@ int main(int argc, char* argv[]) { for (int i = 1; i <= M; i++) { if (seqs[i] == "") { - fprintf(stderr, "%s's sequence is empty! You must provide all chromosome files of transcripts which are presented in the .gtf file!\n", transcripts.getTranscriptAt(i).getTranscriptID().c_str()); + const Transcript& transcript = transcripts.getTranscriptAt(i); + fprintf(stderr, "Cannot extract transcript %s's sequence from chromosome %s! Loading chromosome %s's sequence is failed. Please check if 1) the chromosome directory is set correctly; 2) the list of chromosome files is complete; 3) the FASTA files containing chromosome sequences are not truncated or having wrong format.\n", \ + transcript.getTranscriptID().c_str(), transcript.getSeqName().c_str(), transcript.getSeqName().c_str()); exit(-1); } }