2 * transcripts are numbered from 1. 0 is reserved for noise isoform
16 #include "my_assert.h"
17 #include "Transcript.h"
21 Transcripts(int type = 0) {
22 M = 0; this->type = type;
24 transcripts.push_back(Transcript());
26 e2i.clear(); i2e.clear();
29 int getM() { return M; }
31 // used in shrinking the transcripts
32 void setM(int M) { this->M = M; transcripts.resize(M + 1); }
34 void move(int from, int to) {
36 if (from > to) transcripts[to] = transcripts[from];
39 int getType() { return type; }
40 void setType(int type) { this->type = type; }
42 bool isAlleleSpecific() { return type == 2; }
44 const Transcript& getTranscriptAt(int pos) {
45 assert(pos > 0 && pos <= M);
46 return transcripts[pos];
49 void add(const Transcript& transcript) {
50 transcripts.push_back(transcript);
55 std::sort(transcripts.begin(), transcripts.end());
58 void readFrom(const char*);
59 void writeTo(const char*);
62 int getInternalSid(int eid) {
63 assert(eid > 0 && eid <= M);
67 const Transcript& getTranscriptViaEid(int eid) {
68 return transcripts[getInternalSid(eid)];
71 void buildMappings(int, char**);
74 int M, type; // type 0 from genome, 1 standalone transcriptome, 2 allele-specific
75 std::vector<Transcript> transcripts;
77 std::vector<int> e2i, i2e; // external sid to internal sid, internal sid to external sid
80 void Transcripts::readFrom(const char* inpF) {
82 std::ifstream fin(inpF);
84 if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
88 transcripts.assign(M + 1, Transcript());
89 for (int i = 1; i <= M; i++) {
90 transcripts[i].read(fin);
95 void Transcripts::writeTo(const char* outF) {
96 std::ofstream fout(outF);
97 fout<<M<<" "<<type<<std::endl;
98 for (int i = 1; i <= M; i++) {
99 transcripts[i].write(fout);
104 void Transcripts::buildMappings(int n_targets, char** target_name) {
105 std::map<std::string, int> dict;
106 std::map<std::string, int>::iterator iter;
108 general_assert(n_targets == M, "Number of reference sequences does not match! Please check if the reads are aligned to a transcript set (instead of a genome)!");
111 for (int i = 1; i <= M; i++) {
112 const std::string& tid = isAlleleSpecific() ? transcripts[i].getSeqName() : transcripts[i].getTranscriptID();
113 iter = dict.find(tid);
114 general_assert(iter == dict.end(), tid + " appears more than once!");
118 e2i.assign(M + 1, 0);
119 i2e.assign(M + 1, 0);
120 for (int i = 0; i < n_targets; i++) {
121 iter = dict.find(std::string(target_name[i]));
122 general_assert(iter != dict.end(), "RSEM can not recognize reference sequence name " + cstrtos(target_name[i]) + "!");
123 general_assert(iter->second > 0, "Reference sequence name " + cstrtos(target_name[i]) + " is duplicated!");
124 e2i[i + 1] = iter->second;
125 i2e[iter->second] = i + 1;
130 #endif /* TRANSCRIPTS_H_ */