11 //Each Object can only be used once
20 //Constructor , seq : the forward strand of the reference
21 //tag does not contain ">"
22 //polyALen : length of polyA tail we add
23 RefSeq(const std::string& name, const std::string& seq, int polyALen) {
24 fullLen = seq.length();
25 totLen = fullLen + polyALen;
28 this->seq.append(polyALen, 'A');
30 assert(fullLen > 0 && totLen >= fullLen);
32 int len = (fullLen - 1) / NBITS + 1;
33 fmasks.assign(len, 0);
34 // set mask if poly(A) tail is added
36 for (int i = std::max(fullLen - OLEN + 1, 0); i < fullLen; i++) setMask(i);
40 RefSeq(const RefSeq& o) {
48 RefSeq& operator= (const RefSeq &rhs) {
50 fullLen = rhs.fullLen;
62 bool read(std::ifstream&, int = 0);
63 void write(std::ofstream&);
65 int getFullLen() const { return fullLen; }
67 int getTotLen() const { return totLen; }
69 const std::string& getName() const { return name; }
71 std::string getSeq() const { return seq; }
73 std::string getRSeq() const {
74 std::string rseq = "";
75 for (int i = totLen - 1; i >= 0; i--) rseq.push_back(getCharacter(get_rbase_id(seq[i])));
79 //get the sequence dir 0 : + 1 : -
80 std::string getSeq(int dir) const {
81 return (dir == 0 ? getSeq() : getRSeq());
84 int get_id(int pos, int dir) const {
85 assert(pos >= 0 && pos < totLen);
86 return (dir == 0 ? get_base_id(seq[pos]) : get_rbase_id(seq[totLen - pos - 1]));
89 bool getMask(int seedPos) const {
90 assert(seedPos >= 0 && seedPos < totLen);
91 return fmasks[seedPos / NBITS] & mask_codes[seedPos % NBITS];
94 void setMask(int seedPos) {
95 assert(seedPos >= 0 && seedPos < totLen);
96 fmasks[seedPos / NBITS] |= mask_codes[seedPos % NBITS];
100 int fullLen; // fullLen : the original length of an isoform
101 int totLen; // totLen : the total length, included polyA tails, if any
102 std::string name; // the tag
103 std::string seq; // the raw sequence, in forward strand
104 std::vector<unsigned int> fmasks; // record masks for forward strand, each position occupies 1 bit
107 //internal read; option 0 : read all 1 : do not read seqences
108 bool RefSeq::read(std::ifstream& fin, int option) {
111 if (!(fin>>fullLen>>totLen)) return false;
112 assert(fullLen > 0 && totLen >= fullLen);
114 if (!getline(fin, name)) return false;
115 if (!getline(fin, seq)) return false;
117 int len = (fullLen - 1) / NBITS + 1; // assume each cell contains NBITS bits
118 fmasks.assign(len, 0);
119 for (int i = 0; i < len; i++)
120 if (!(fin>>fmasks[i])) return false;
123 assert(option == 0 || option == 1);
124 if (option == 1) { seq = ""; }
129 //write to file in "internal" format
130 void RefSeq::write(std::ofstream& fout) {
131 fout<<fullLen<<" "<<totLen<<std::endl;
132 fout<<name<<std::endl;
133 fout<<seq<<std::endl;
135 int len = fmasks.size();
136 for (int i = 0; i < len - 1; i++) fout<<fmasks[i]<<" ";
137 fout<<fmasks[len - 1]<<std::endl;