]> git.donarmstrong.com Git - rsem.git/blobdiff - extractRef.cpp
Updated samtools to 0.1.19
[rsem.git] / extractRef.cpp
index 693af6635ae9df85006dcc7b991ee5d2e497bed8..fb7d3ecb586d36e586d8698a9f4c94bc0ec4cc46 100644 (file)
@@ -50,24 +50,24 @@ map<string, string> mi_table; // mapping info table
 map<string, string>::iterator mi_iter; //mapping info table's iterator
 
 void loadMappingInfo(char* mappingF) {
-  ifstream fin(mappingF);
-  string line, key, value;
-
-  if (!fin.is_open()) {
-         fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF);
-         exit(-1);
-  }
-
-  mi_table.clear();
-  while (getline(fin, line)) {
-    line = cleanStr(line);
-    if (line[0] == '#') continue;
-    istringstream strin(line);
-    strin>>value>>key;
-    mi_table[key] = value;
-  }
-
-  fin.close();
+       ifstream fin(mappingF);
+       string line, key, value;
+
+       if (!fin.is_open()) {
+               fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF);
+               exit(-1);
+       }
+
+       mi_table.clear();
+       while (getline(fin, line)) {
+               line = cleanStr(line);
+               if (line[0] == '#') continue;
+               istringstream strin(line);
+               strin>>value>>key;
+               mi_table[key] = value;
+       }
+
+       fin.close();
 }
 
 bool buildTranscript(int sp, int ep) {
@@ -134,14 +134,14 @@ void parse_gtf_file(char* gtfF) {
                        else {
                                if (hasMappingFile) {
                                        tid = item.getTranscriptID();
-                                   mi_iter = mi_table.find(tid);
-                                   if (mi_iter == mi_table.end()) {
-                                       fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str());
-                                       exit(-1);
-                                   }
-                                   //assert(iter != table.end());
-                                   gid = mi_iter->second;
-                                   item.setGeneID(gid);
+                                       mi_iter = mi_table.find(tid);
+                                       if (mi_iter == mi_table.end()) {
+                                         fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str());
+                                         exit(-1);
+                                       }
+                                       //assert(iter != table.end());
+                                       gid = mi_iter->second;
+                                       item.setGeneID(gid);
                                }
                                items.push_back(item);
                        }
@@ -260,7 +260,6 @@ int main(int argc, char* argv[]) {
 
        ifstream fin;
        string line, gseq, seqname;
-       void* pt;
 
        chrvec.clear();
 
@@ -270,31 +269,31 @@ int main(int argc, char* argv[]) {
        for (int i = start; i < argc; i++) {
                fin.open(argv[i]);
                if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", argv[i]); exit(-1); }
-               pt = getline(fin, line);
-               while (pt != 0 && line[0] == '>') {
+               getline(fin, line);
+               while ((fin) && (line[0] == '>')) {
                        istringstream strin(line.substr(1));
                        strin>>seqname;
 
                        gseq = "";
-                   while((pt = getline(fin, line)) && line[0] != '>') {
-                       gseq += line;
-                   }
-
-                   size_t len = gseq.length();
-                   assert(len > 0);
-                   for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j]);
-
-                   iter = sn2tr.find(seqname);
-                   if (iter == sn2tr.end()) continue;
-
-                   chrvec.push_back(ChrInfo(seqname, len));
-
-                   vector<int>& vec = iter->second;
-                   int s = vec.size();
-                   for (int j = 0; j < s; j++) {
-                       assert(vec[j] > 0 && vec[j] <= M);
-                       transcripts.getTranscriptAt(vec[j]).extractSeq(gseq, seqs[vec[j]]);
-                   }
+                       while((getline(fin, line)) && (line[0] != '>')) {
+                         gseq += line;
+                       }
+
+                       size_t len = gseq.length();
+                       assert(len > 0);
+                       for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j]);
+                       
+                       iter = sn2tr.find(seqname);
+                       if (iter == sn2tr.end()) continue;
+                       
+                       chrvec.push_back(ChrInfo(seqname, len));
+                       
+                       vector<int>& vec = iter->second;
+                       int s = vec.size();
+                       for (int j = 0; j < s; j++) {
+                         assert(vec[j] > 0 && vec[j] <= M);
+                         transcripts.getTranscriptAt(vec[j]).extractSeq(gseq, seqs[vec[j]]);
+                       }
                }
                fin.close();
 
@@ -303,7 +302,9 @@ int main(int argc, char* argv[]) {
 
        for (int i = 1; i <= M; i++) {
                if (seqs[i] == "") {
-                       fprintf(stderr, "%s's sequence is empty! You must provide all chromosome files of transcripts which are presented in the .gtf file!\n", transcripts.getTranscriptAt(i).getTranscriptID().c_str());
+                       const Transcript& transcript = transcripts.getTranscriptAt(i);
+                       fprintf(stderr, "Cannot extract transcript %s's sequence from chromosome %s, whose information might not be provided! Please check if the chromosome directory is set correctly or the list of chromosome files is complete.\n", \
+                                       transcript.getTranscriptID().c_str(), transcript.getSeqName().c_str());
                        exit(-1);
                }
        }