X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=Transcripts.h;h=033669b740e97737ce4f5f4bc3a25f016a083910;hp=2637d35d197b0070e636a16a0404c669190b6e6d;hb=636b82d9f60ebcbec7ef1b73ba23bbbacfd8b36a;hpb=237bbdf363c9e42ee24e2fd63106dccf20d9bf2f diff --git a/Transcripts.h b/Transcripts.h index 2637d35..033669b 100644 --- a/Transcripts.h +++ b/Transcripts.h @@ -16,7 +16,6 @@ #include "my_assert.h" #include "Transcript.h" - class Transcripts { public: Transcripts(int type = 0) { @@ -28,7 +27,19 @@ public: } int getM() { return M; } + + // used in shrinking the transcripts + void setM(int M) { this->M = M; transcripts.resize(M + 1); } + + void move(int from, int to) { + assert(from >= to); + if (from > to) transcripts[to] = transcripts[from]; + } + int getType() { return type; } + void setType(int type) { this->type = type; } + + bool isAlleleSpecific() { return type == 2; } const Transcript& getTranscriptAt(int pos) { assert(pos > 0 && pos <= M); @@ -60,7 +71,7 @@ public: void buildMappings(int, char**); private: - int M, type; // type 0 from genome , 1 standalone transcriptome + int M, type; // type 0 from genome, 1 standalone transcriptome, 2 allele-specific std::vector transcripts; std::vector e2i, i2e; // external sid to internal sid, internal sid to external sid @@ -94,13 +105,13 @@ void Transcripts::buildMappings(int n_targets, char** target_name) { std::map dict; std::map::iterator iter; - general_assert(n_targets == M, "Number of transcripts does not match! Please check if the reads are aligned to a transcript set (instead of a genome)!"); + general_assert(n_targets == M, "Number of reference sequences does not match! Please check if the reads are aligned to a transcript set (instead of a genome)!"); dict.clear(); for (int i = 1; i <= M; i++) { - const std::string& tid = transcripts[i].getTranscriptID(); + const std::string& tid = isAlleleSpecific() ? transcripts[i].getSeqName() : transcripts[i].getTranscriptID(); iter = dict.find(tid); - assert(iter == dict.end()); + general_assert(iter == dict.end(), tid + " appears more than once!"); dict[tid] = i; } @@ -108,7 +119,7 @@ void Transcripts::buildMappings(int n_targets, char** target_name) { i2e.assign(M + 1, 0); for (int i = 0; i < n_targets; i++) { iter = dict.find(std::string(target_name[i])); - general_assert(iter != dict.end(), "RSEM can not recognize transcript " + cstrtos(target_name[i]) + "!"); + general_assert(iter != dict.end(), "RSEM can not recognize reference sequence name " + cstrtos(target_name[i]) + "!"); general_assert(iter->second > 0, "Reference sequence name " + cstrtos(target_name[i]) + " is duplicated!"); e2i[i + 1] = iter->second; i2e[iter->second] = i + 1;