Refs() {
M = 0;
seqs.clear();
+ has_polyA = false;
}
~Refs() {}
std::vector<RefSeq>& getRefs() { return seqs; } // may be slow, for copying the whole thing
+ bool hasPolyA() { return has_polyA; } // if any of sequence has poly(A) tail added
+
//lim : >=0 If mismatch > lim , return; -1 find all mismatches
int countMismatch(const std::string& seq, int pos, const std::string& readseq, int LEN, int lim = -1) {
int nMis = 0; // number of mismatches
private:
int M; // # of isoforms, id starts from 1
std::vector<RefSeq> seqs; // reference sequences, starts from 1; 0 is for noise gene
-
+ bool has_polyA; // if at least one sequence has polyA added, the value is true; otherwise, the value is false
};
//inpF in fasta format
//read standard fasta format here
std::ifstream fin;
std::string tag, line, rawseq;
- void* pt; // istream& is indeed a pointer, that's why I can use void* here
seqs.clear();
seqs.push_back(RefSeq()); // noise isoform
M = 0;
+ has_polyA = false;
fin.open(inpF);
if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
- pt = getline(fin, line);
- while (pt != 0 && line[0] == '>') {
+ getline(fin, line);
+ while ((fin) && (line[0] == '>')) {
tag = line.substr(1);
rawseq = "";
- while((pt = getline(fin, line)) && line[0] != '>') {
+ while((getline(fin, line)) && (line[0] != '>')) {
rawseq += line;
}
if (rawseq.size() <= 0) {
}
++M;
seqs.push_back(RefSeq(tag, policy.convert(rawseq), rules.getLenAt(tag)));
+ has_polyA = has_polyA || seqs[M].getFullLen() < seqs[M].getTotLen();
}
fin.close();
seqs.push_back(RefSeq());
M = 0;
+ has_polyA = false;
bool success;
do {
if (success) {
seqs.push_back(seq);
++M;
+ has_polyA = has_polyA || seq.getFullLen() < seq.getTotLen();
}
} while (success);