10 #include "Transcript.h"
11 #include "Transcripts.h"
17 map<string, string> name2seq;
18 map<string, string>::iterator iter;
20 Transcripts transcripts(1); // no genome, just transcript set
21 char groupF[STRLEN], tiF[STRLEN], refFastaF[STRLEN], chromListF[STRLEN];
24 char mappingFile[STRLEN];
26 map<string, string> mi_table; // mapping info table
27 map<string, string>::iterator mi_iter; //mapping info table's iterator
29 void loadMappingInfo(char* mappingF) {
30 ifstream fin(mappingF);
31 string line, key, value;
34 fprintf(stderr, "Cannot open %s! It may not exist.\n", mappingF);
39 while (getline(fin, line)) {
40 line = cleanStr(line);
41 if (line[0] == '#') continue;
42 istringstream strin(line);
44 mi_table[key] = value;
51 if (!isalpha(c)) { fprintf(stderr, "Sequence contains unknown letter '%c'!\n", c); exit(-1); }
53 if (isupper(c) && c != 'A' && c != 'C' && c != 'G' && c != 'T') c = 'N';
54 if (islower(c) && c != 'a' && c != 'c' && c != 'g' && c != 't') c = 'n';
58 void writeResults(char* refName) {
60 string cur_gene_id, name;
62 sprintf(groupF, "%s.grp", refName);
63 sprintf(tiF, "%s.ti", refName);
64 sprintf(refFastaF, "%s.transcripts.fa", refName);
65 sprintf(chromListF, "%s.chrlist", refName);
67 transcripts.writeTo(tiF);
68 if (verbose) { printf("Transcript Information File is generated!\n"); }
72 for (int i = 1; i <= M; i++) {
73 const Transcript& transcript = transcripts.getTranscriptAt(i);
74 if (cur_gene_id != transcript.getGeneID()) {
76 cur_gene_id = transcript.getGeneID();
81 if (verbose) { printf("Group File is generated!\n"); }
83 fout2.open(chromListF);
85 for (int i = 1; i <= M; i++) {
86 name = transcripts.getTranscriptAt(i).getTranscriptID();
87 iter = name2seq.find(name);
88 if (iter == name2seq.end()) {
89 fprintf(stderr, "Cannot recognize transcript ID %s!\n", name.c_str());
92 fout<<">"<<name<<endl;
93 fout<<iter->second<<endl;
95 fout2<<name<<'\t'<<iter->second.length()<<endl;
101 printf("Chromosome List File is generated!\n");
102 printf("Extracted Sequences File is generated!\n");
106 int main(int argc, char* argv[]) {
107 if (argc < 5 || ((hasMappingFile = atoi(argv[3])) && argc < 6)) {
108 printf("Usage: synthesisRef refName quiet hasMappingFile [mappingFile] reference_file_1 [reference_file_2 ...]\n");
112 verbose = !atoi(argv[2]);
114 if (hasMappingFile) { loadMappingInfo(argv[4]); }
116 int start = hasMappingFile ? 5 : 4;
120 string seqname, gene_id;
123 vector<Interval> vec;
127 for (int i = start; i < argc; i++) {
129 if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", argv[i]); exit(-1); }
130 pt = getline(fin, line);
131 while (pt != 0 && line[0] == '>') {
132 istringstream strin(line.substr(1));
136 while((pt = getline(fin, line)) && line[0] != '>') {
140 int len = gseq.length();
142 for (int j = 0; j < len; j++) gseq[j] = check(gseq[j]);
144 name2seq[seqname] = gseq;
146 if (hasMappingFile) {
147 mi_iter = mi_table.find(seqname);
148 if (mi_iter == mi_table.end()) {
149 fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", seqname.c_str());
152 //assert(iter != table.end());
153 gene_id = mi_iter->second;
155 else gene_id = seqname;
158 vec.push_back(Interval(1, len));
159 transcripts.add(Transcript(seqname, gene_id, seqname, '+', vec, ""));
162 if (verbose && M % 1000000 == 0) { printf("%d sequences are processed!\n", M); }
168 fprintf(stderr, "Number of transcripts in the reference is less than 1!\n");
172 assert(M == transcripts.getM());
175 writeResults(argv[1]);