Refs() {
M = 0;
seqs.clear();
+ has_polyA = false;
}
~Refs() {}
std::vector<RefSeq>& getRefs() { return seqs; } // may be slow, for copying the whole thing
+ bool hasPolyA() { return has_polyA; } // if any of sequence has poly(A) tail added
+
//lim : >=0 If mismatch > lim , return; -1 find all mismatches
int countMismatch(const std::string& seq, int pos, const std::string& readseq, int LEN, int lim = -1) {
int nMis = 0; // number of mismatches
}
bool isValid(int sid, int dir, int pos, const std::string& readseq, int LEN, int C) {
- if (sid <= 0 || sid > M || dir != 0 && dir != 1 || pos < 0 || pos + LEN > seqs[sid].getTotLen() || LEN > (int)readseq.length()) return false;
+ if (sid <= 0 || sid > M || (dir != 0 && dir != 1) || pos < 0 || pos + LEN > seqs[sid].getTotLen() || LEN > (int)readseq.length()) return false;
const std::string& seq = seqs[sid].getSeq(dir);
return countMismatch(seq, pos, readseq, LEN, C) <= C;
}
private:
int M; // # of isoforms, id starts from 1
std::vector<RefSeq> seqs; // reference sequences, starts from 1; 0 is for noise gene
-
+ bool has_polyA; // if at least one sequence has polyA added, the value is true; otherwise, the value is false
};
//inpF in fasta format
//read standard fasta format here
std::ifstream fin;
std::string tag, line, rawseq;
- void* pt; // istream& is indeed a pointer, that's why I can use void* here
seqs.clear();
seqs.push_back(RefSeq()); // noise isoform
M = 0;
+ has_polyA = false;
fin.open(inpF);
if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
- pt = getline(fin, line);
- while (pt != 0 && line[0] == '>') {
+ getline(fin, line);
+ while ((fin) && (line[0] == '>')) {
tag = line.substr(1);
rawseq = "";
- while((pt = getline(fin, line)) && line[0] != '>') {
+ while((getline(fin, line)) && (line[0] != '>')) {
rawseq += line;
}
- assert(rawseq.size() > 0);
+ if (rawseq.size() <= 0) {
+ printf("Warning: Fasta entry %s has an empty sequence! It is omitted!\n", tag.c_str());
+ continue;
+ }
++M;
seqs.push_back(RefSeq(tag, policy.convert(rawseq), rules.getLenAt(tag)));
+ has_polyA = has_polyA || seqs[M].getFullLen() < seqs[M].getTotLen();
}
fin.close();
seqs.push_back(RefSeq());
M = 0;
+ has_polyA = false;
bool success;
do {
if (success) {
seqs.push_back(seq);
++M;
+ has_polyA = has_polyA || seq.getFullLen() < seq.getTotLen();
}
} while (success);