11 //Each Object can only be used once
21 //Constructor , seq : the forward strand of the reference
22 //tag does not contain ">"
23 //polyALen : length of polyA tail we add
24 RefSeq(const std::string& name, const std::string& seq, int polyALen) {
25 fullLen = seq.length();
26 totLen = fullLen + polyALen;
29 this->seq.append(polyALen, 'A');
31 assert(fullLen > 0 && totLen >= fullLen);
33 int len = (fullLen - 1) / NBITS + 1;
34 fmasks.clear(); fmasks.resize(len, 0);
35 // ask read to be at least OLEN long!
36 for (int i = std::max(fullLen - OLEN + 1, 0); i < fullLen; i++) setMask(i);
39 RefSeq(const RefSeq& o) {
47 RefSeq& operator= (const RefSeq &rhs) {
49 fullLen = rhs.fullLen;
62 bool read(std::ifstream&, int = 0);
63 void write(std::ofstream&);
65 int getFullLen() const { return fullLen; }
67 int getTotLen() const { return totLen; }
69 const std::string& getName() const { return name; }
71 std::string getSeq() const { return seq; }
73 std::string getRSeq() const {
74 std::string rseq = "";
75 for (int i = totLen - 1; i >= 0; i--) rseq.push_back(getCharacter(get_rbase_id(seq[i])));
79 //get the sequence dir 0 : + 1 : -
80 std::string getSeq(int dir) const {
81 return (dir == 0 ? getSeq() : getRSeq());
85 int get_id(int pos, int dir) const {
86 assert(pos >= 0 && pos < totLen);
87 return (dir == 0 ? get_base_id(seq[pos]) : get_rbase_id(seq[totLen - pos - 1]));
90 bool getMask(int seedPos) const {
91 assert(seedPos >= 0 && seedPos < totLen);
92 return fmasks[seedPos / NBITS] & mask_codes[seedPos % NBITS];
95 void setMask(int seedPos) {
96 assert(seedPos >= 0 && seedPos < totLen);
97 fmasks[seedPos / NBITS] |= mask_codes[seedPos % NBITS];
101 int fullLen; // fullLen : the original length of an isoform
102 int totLen; // totLen : the total length, included polyA tails, if any
103 std::string name; // the tag
104 std::string seq; // the raw sequence, in forward strand
105 std::vector<unsigned int> fmasks; // record masks for forward strand, each position occupies 1 bit
108 //internal read; option 0 : read all 1 : do not read seqences
109 bool RefSeq::read(std::ifstream& fin, int option) {
112 if (!(fin>>fullLen>>totLen)) return false;
113 assert(fullLen > 0 && totLen >= fullLen);
115 if (!getline(fin, name)) return false;
116 if (!getline(fin, seq)) return false;
118 int len = (fullLen - 1) / NBITS + 1; // assume each cell contains NBITS bits
119 fmasks.resize(len, 0);
120 for (int i = 0; i < len; i++)
121 if (!(fin>>fmasks[i])) return false;
124 assert(option == 0 || option == 1);
125 if (option == 1) { seq = ""; }
130 //write to file in "internal" format
131 void RefSeq::write(std::ofstream& fout) {
132 fout<<fullLen<<" "<<totLen<<std::endl;
133 fout<<name<<std::endl;
134 fout<<seq<<std::endl;
136 int len = fmasks.size();
137 for (int i = 0; i < len - 1; i++) fout<<fmasks[i]<<" ";
138 fout<<fmasks[len - 1]<<std::endl;