X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=RefSeq.h;h=88b149e0068e8aa342e14aedf126dac79deac136;hp=1b2e58e26b90efa40717a0567df06f263e1e88ea;hb=2d71eb92104693ca9baa5a2e1c23eeca776d8fd3;hpb=a95154919f950f86de9104b2b9dcf1f0c7e83387 diff --git a/RefSeq.h b/RefSeq.h index 1b2e58e..88b149e 100644 --- a/RefSeq.h +++ b/RefSeq.h @@ -10,132 +10,131 @@ //Each Object can only be used once class RefSeq { - public: - - RefSeq() { - fullLen = totLen = 0; - name = ""; seq = ""; - fmasks.clear(); +public: + RefSeq() { + fullLen = totLen = 0; + name = ""; seq = ""; + fmasks.clear(); + } + + //Constructor , seq : the forward strand of the reference + //tag does not contain ">" + //polyALen : length of polyA tail we add + RefSeq(const std::string& name, const std::string& seq, int polyALen) { + fullLen = seq.length(); + totLen = fullLen + polyALen; + this->name = name; + this->seq = seq; + this->seq.append(polyALen, 'A'); + + assert(fullLen > 0 && totLen >= fullLen); + + int len = (fullLen - 1) / NBITS + 1; + fmasks.assign(len, 0); + // set mask if poly(A) tail is added + if (polyALen > 0) { + for (int i = std::max(fullLen - OLEN + 1, 0); i < fullLen; i++) setMask(i); + } } - //Constructor , seq : the forward strand of the reference - //tag does not contain ">" - //polyALen : length of polyA tail we add - RefSeq(const std::string& name, const std::string& seq, int polyALen) { - fullLen = seq.length(); - totLen = fullLen + polyALen; - this->name = name; - this->seq = seq; - this->seq.append(polyALen, 'A'); - - assert(fullLen > 0 && totLen >= fullLen); - - int len = (fullLen - 1) / NBITS + 1; - fmasks.clear(); fmasks.resize(len, 0); - // ask read to be at least OLEN long! - for (int i = std::max(fullLen - OLEN + 1, 0); i < fullLen; i++) setMask(i); - } + RefSeq(const RefSeq& o) { + fullLen = o.fullLen; + totLen = o.totLen; + name = o.name; + seq = o.seq; + fmasks = o.fmasks; + } - RefSeq(const RefSeq& o) { - fullLen = o.fullLen; - totLen = o.totLen; - name = o.name; - seq = o.seq; - fmasks = o.fmasks; - } + RefSeq& operator= (const RefSeq &rhs) { + if (this != &rhs) { + fullLen = rhs.fullLen; + totLen = rhs.totLen; + name = rhs.name; + seq = rhs.seq; + fmasks = rhs.fmasks; + } - RefSeq& operator= (const RefSeq &rhs) { - if (this != &rhs) { - fullLen = rhs.fullLen; - totLen = rhs.totLen; - name = rhs.name; - seq = rhs.seq; - fmasks = rhs.fmasks; - } + return *this; + } - return *this; - } + ~RefSeq() {} - ~RefSeq() { - } + bool read(std::ifstream&, int = 0); + void write(std::ofstream&); - bool read(std::ifstream&, int = 0); - void write(std::ofstream&); + int getFullLen() const { return fullLen; } - int getFullLen() const { return fullLen; } + int getTotLen() const { return totLen; } - int getTotLen() const { return totLen; } + const std::string& getName() const { return name; } - const std::string& getName() const { return name; } + std::string getSeq() const { return seq; } - std::string getSeq() const { return seq; } - - std::string getRSeq() const { - std::string rseq = ""; - for (int i = totLen - 1; i >= 0; i--) rseq.push_back(getCharacter(get_rbase_id(seq[i]))); - return rseq; - } - - //get the sequence dir 0 : + 1 : - - std::string getSeq(int dir) const { - return (dir == 0 ? getSeq() : getRSeq()); - } + std::string getRSeq() const { + std::string rseq = ""; + for (int i = totLen - 1; i >= 0; i--) rseq.push_back(getCharacter(get_rbase_id(seq[i]))); + return rseq; + } + //get the sequence dir 0 : + 1 : - + std::string getSeq(int dir) const { + return (dir == 0 ? getSeq() : getRSeq()); + } - int get_id(int pos, int dir) const { - assert(pos >= 0 && pos < totLen); - return (dir == 0 ? get_base_id(seq[pos]) : get_rbase_id(seq[totLen - pos - 1])); - } - - bool getMask(int seedPos) const { - assert(seedPos >= 0 && seedPos < totLen); - return fmasks[seedPos / NBITS] & mask_codes[seedPos % NBITS]; - } - - void setMask(int seedPos) { - assert(seedPos >= 0 && seedPos < totLen); - fmasks[seedPos / NBITS] |= mask_codes[seedPos % NBITS]; - } + int get_id(int pos, int dir) const { + assert(pos >= 0 && pos < totLen); + return (dir == 0 ? get_base_id(seq[pos]) : get_rbase_id(seq[totLen - pos - 1])); + } + + bool getMask(int seedPos) const { + assert(seedPos >= 0 && seedPos < totLen); + return fmasks[seedPos / NBITS] & mask_codes[seedPos % NBITS]; + } + + void setMask(int seedPos) { + assert(seedPos >= 0 && seedPos < totLen); + fmasks[seedPos / NBITS] |= mask_codes[seedPos % NBITS]; + } - private: - int fullLen; // fullLen : the original length of an isoform - int totLen; // totLen : the total length, included polyA tails, if any - std::string name; // the tag - std::string seq; // the raw sequence, in forward strand - std::vector fmasks; // record masks for forward strand, each position occupies 1 bit +private: + int fullLen; // fullLen : the original length of an isoform + int totLen; // totLen : the total length, included polyA tails, if any + std::string name; // the tag + std::string seq; // the raw sequence, in forward strand + std::vector fmasks; // record masks for forward strand, each position occupies 1 bit }; -//internal read; option 0 : read all 1 : do not read seqence and name +//internal read; option 0 : read all 1 : do not read seqences bool RefSeq::read(std::ifstream& fin, int option) { - std::string line; + std::string line; - if (!(fin>>fullLen>>totLen)) return false; - assert(fullLen > 0 && totLen >= fullLen); - getline(fin, line); - if (!getline(fin, name)) return false; - if (!getline(fin, seq)) return false; - - int len = (fullLen - 1) / NBITS + 1; // assume each cell contains NBITS bits - fmasks.resize(len, 0); - for (int i = 0; i < len; i++) - if (!(fin>>fmasks[i])) return false; - getline(fin, line); + if (!(fin>>fullLen>>totLen)) return false; + assert(fullLen > 0 && totLen >= fullLen); + getline(fin, line); + if (!getline(fin, name)) return false; + if (!getline(fin, seq)) return false; + + int len = (fullLen - 1) / NBITS + 1; // assume each cell contains NBITS bits + fmasks.assign(len, 0); + for (int i = 0; i < len; i++) + if (!(fin>>fmasks[i])) return false; + getline(fin, line); - assert(option == 0 || option == 1); - if (option == 1) { name = seq = ""; } + assert(option == 0 || option == 1); + if (option == 1) { seq = ""; } - return true; + return true; } //write to file in "internal" format void RefSeq::write(std::ofstream& fout) { - fout<