map<string, string>::iterator mi_iter; //mapping info table's iterator
void loadMappingInfo(char* mappingF) {
- ifstream fin(mappingF);
- string line, key, value;
-
- if (!fin.is_open()) {
- fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF);
- exit(-1);
- }
-
- mi_table.clear();
- while (getline(fin, line)) {
- line = cleanStr(line);
- if (line[0] == '#') continue;
- istringstream strin(line);
- strin>>value>>key;
- mi_table[key] = value;
- }
-
- fin.close();
+ ifstream fin(mappingF);
+ string line, key, value;
+
+ if (!fin.is_open()) {
+ fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF);
+ exit(-1);
+ }
+
+ mi_table.clear();
+ while (getline(fin, line)) {
+ line = cleanStr(line);
+ if (line[0] == '#') continue;
+ istringstream strin(line);
+ strin>>value>>key;
+ mi_table[key] = value;
+ }
+
+ fin.close();
}
bool buildTranscript(int sp, int ep) {
int start = items[i].getStart();
int end = items[i].getEnd();
- assert(strand == items[i].getStrand());
- assert(seqname == items[i].getSeqName());
+ if (strand != items[i].getStrand()) {
+ fprintf(stderr, "According to the GTF file given, a transcript has exons from different orientations!\n");
+ exit(-1);
+ }
+ if (seqname != items[i].getSeqName()) {
+ fprintf(stderr, "According to the GTF file given, a transcript has exons on multiple chromosomes!\n");
+ exit(-1);
+ }
if (cur_e + 1 < start) {
if (cur_s > 0) vec.push_back(Interval(cur_s, cur_e));
items.clear();
while (getline(fin, line)) {
+ if (line[0] == '#') continue; // if this line is comment, jump it
item.parse(line);
string feature = item.getFeature();
if (feature == "exon") {
else {
if (hasMappingFile) {
tid = item.getTranscriptID();
- mi_iter = mi_table.find(tid);
- if (mi_iter == mi_table.end()) {
- fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str());
- exit(-1);
- }
- //assert(iter != table.end());
- gid = mi_iter->second;
- item.setGeneID(gid);
+ mi_iter = mi_table.find(tid);
+ if (mi_iter == mi_table.end()) {
+ fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str());
+ exit(-1);
+ }
+ //assert(iter != table.end());
+ gid = mi_iter->second;
+ item.setGeneID(gid);
}
items.push_back(item);
}
}
int main(int argc, char* argv[]) {
- if (argc < 6 || (hasMappingFile = atoi(argv[4])) && argc < 7) {
+ if (argc < 6 || ((hasMappingFile = atoi(argv[4])) && argc < 7)) {
printf("Usage: rsem-extract-reference-transcripts refName quiet gtfF hasMappingFile [mappingFile] chromosome_file_1 [chromosome_file_2 ...]\n");
exit(-1);
}
ifstream fin;
string line, gseq, seqname;
- void* pt;
chrvec.clear();
for (int i = start; i < argc; i++) {
fin.open(argv[i]);
if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", argv[i]); exit(-1); }
- pt = getline(fin, line);
- while (pt != 0 && line[0] == '>') {
+ getline(fin, line);
+ while ((fin) && (line[0] == '>')) {
istringstream strin(line.substr(1));
strin>>seqname;
gseq = "";
- while((pt = getline(fin, line)) && line[0] != '>') {
- gseq += line;
- }
-
- size_t len = gseq.length();
- assert(len > 0);
- for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j]);
-
- iter = sn2tr.find(seqname);
- if (iter == sn2tr.end()) continue;
-
- chrvec.push_back(ChrInfo(seqname, len));
-
- vector<int>& vec = iter->second;
- int s = vec.size();
- for (int j = 0; j < s; j++) {
- assert(vec[j] > 0 && vec[j] <= M);
- transcripts.getTranscriptAt(vec[j]).extractSeq(gseq, seqs[vec[j]]);
- }
+ while((getline(fin, line)) && (line[0] != '>')) {
+ gseq += line;
+ }
+
+ size_t len = gseq.length();
+ assert(len > 0);
+ for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j]);
+
+ iter = sn2tr.find(seqname);
+ if (iter == sn2tr.end()) continue;
+
+ chrvec.push_back(ChrInfo(seqname, len));
+
+ vector<int>& vec = iter->second;
+ int s = vec.size();
+ for (int j = 0; j < s; j++) {
+ assert(vec[j] > 0 && vec[j] <= M);
+ transcripts.getTranscriptAt(vec[j]).extractSeq(gseq, seqs[vec[j]]);
+ }
}
fin.close();
for (int i = 1; i <= M; i++) {
if (seqs[i] == "") {
- fprintf(stderr, "%s's sequence is empty! You must provide all chromosome files of transcripts which are presented in the .gtf file!\n", transcripts.getTranscriptAt(i).getTranscriptID().c_str());
+ const Transcript& transcript = transcripts.getTranscriptAt(i);
+
+ fprintf(stderr, "Cannot extract transcript %s's sequence from chromosome %s! Loading chromosome %s's sequence is failed. Please check if 1) the chromosome directory is set correctly; 2) the list of chromosome files is complete; 3) the FASTA files containing chromosome sequences are not truncated or having wrong format.\n", \
+ transcript.getTranscriptID().c_str(), transcript.getSeqName().c_str(), transcript.getSeqName().c_str());
exit(-1);
}
}