X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Transcripts.h;h=2637d35d197b0070e636a16a0404c669190b6e6d;hb=97554bbac838f2ed578d81f98e421dac0669e74e;hp=2750e0c36394d3c3be5cb7d63dfb74f7ae5687a3;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387;p=rsem.git diff --git a/Transcripts.h b/Transcripts.h index 2750e0c..2637d35 100644 --- a/Transcripts.h +++ b/Transcripts.h @@ -10,7 +10,10 @@ #include #include #include +#include +#include +#include "my_assert.h" #include "Transcript.h" @@ -20,6 +23,8 @@ public: M = 0; this->type = type; transcripts.clear(); transcripts.push_back(Transcript()); + + e2i.clear(); i2e.clear(); } int getM() { return M; } @@ -42,9 +47,23 @@ public: void readFrom(const char*); void writeTo(const char*); + //Eid: external sid + int getInternalSid(int eid) { + assert(eid > 0 && eid <= M); + return e2i[eid]; + } + + const Transcript& getTranscriptViaEid(int eid) { + return transcripts[getInternalSid(eid)]; + } + + void buildMappings(int, char**); + private: int M, type; // type 0 from genome , 1 standalone transcriptome std::vector transcripts; + + std::vector e2i, i2e; // external sid to internal sid, internal sid to external sid }; void Transcripts::readFrom(const char* inpF) { @@ -55,8 +74,7 @@ void Transcripts::readFrom(const char* inpF) { fin>>M>>type; getline(fin, line); - transcripts.clear(); - transcripts.resize(M + 1); + transcripts.assign(M + 1, Transcript()); for (int i = 1; i <= M; i++) { transcripts[i].read(fin); } @@ -72,4 +90,30 @@ void Transcripts::writeTo(const char* outF) { fout.close(); } +void Transcripts::buildMappings(int n_targets, char** target_name) { + std::map dict; + std::map::iterator iter; + + general_assert(n_targets == M, "Number of transcripts does not match! Please check if the reads are aligned to a transcript set (instead of a genome)!"); + + dict.clear(); + for (int i = 1; i <= M; i++) { + const std::string& tid = transcripts[i].getTranscriptID(); + iter = dict.find(tid); + assert(iter == dict.end()); + dict[tid] = i; + } + + e2i.assign(M + 1, 0); + i2e.assign(M + 1, 0); + for (int i = 0; i < n_targets; i++) { + iter = dict.find(std::string(target_name[i])); + general_assert(iter != dict.end(), "RSEM can not recognize transcript " + cstrtos(target_name[i]) + "!"); + general_assert(iter->second > 0, "Reference sequence name " + cstrtos(target_name[i]) + " is duplicated!"); + e2i[i + 1] = iter->second; + i2e[iter->second] = i + 1; + iter->second = -1; + } +} + #endif /* TRANSCRIPTS_H_ */