14 #include "RefSeqPolicy.h"
15 #include "PolyARules.h"
27 void makeRefs(char*, RefSeqPolicy&, PolyARules&);
28 void loadRefs(char*, int = 0);
31 int getM() { return M; } // get number of isoforms
33 //int getNS() { return M + 1; } // get number of parameters, I do not think we need it here.
35 RefSeq& getRef(int sid) { return seqs[sid]; } // get a particular reference
37 std::vector<RefSeq>& getRefs() { return seqs; } // may be slow, for copying the whole thing
39 //lim : >=0 If mismatch > lim , return; -1 find all mismatches
40 int countMismatch(const std::string& seq, int pos, const std::string& readseq, int LEN, int lim = -1) {
41 int nMis = 0; // number of mismatches
43 for (int i = 0; i < LEN; i++) {
44 char rc = toupper(readseq[i]);
45 if (seq[i + pos] == 'N' || rc == 'N' || seq[i + pos] != rc) nMis++;
48 if (lim >= 0 && nMis > lim) return nMis;
54 bool isValid(int sid, int dir, int pos, const std::string& readseq, int LEN, int C) {
55 if (sid <= 0 || sid > M || dir != 0 && dir != 1 || pos < 0 || pos + LEN > seqs[sid].getTotLen() || LEN > (int)readseq.length()) return false;
56 const std::string& seq = seqs[sid].getSeq(dir);
57 return countMismatch(seq, pos, readseq, LEN, C) <= C;
60 // get segment from refs
61 std::string getSegment(int sid, int dir, int pos, int LEN) {
62 if (pos < 0 || pos + LEN > seqs[sid].getTotLen()) return "fail";
64 const std::string& seq = seqs[sid].getSeq(dir);
67 for (int i = 0; i < LEN; i++)
68 seg.append(1, seq[pos + i]);
74 int M; // # of isoforms, id starts from 1
75 std::vector<RefSeq> seqs; // reference sequences, starts from 1; 0 is for noise gene
79 //inpF in fasta format
80 void Refs::makeRefs(char *inpF, RefSeqPolicy& policy, PolyARules& rules) {
81 //read standard fasta format here
83 std::string tag, line, rawseq;
84 void* pt; // istream& is indeed a pointer, that's why I can use void* here
87 seqs.push_back(RefSeq()); // noise isoform
92 if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
93 pt = getline(fin, line);
94 while (pt != 0 && line[0] == '>') {
97 while((pt = getline(fin, line)) && line[0] != '>') {
100 assert(rawseq.size() > 0);
102 seqs.push_back(RefSeq(tag, policy.convert(rawseq), rules.getLenAt(tag)));
106 if (verbose) { printf("Refs.makeRefs finished!\n"); }
109 //inpF in fasta format, with sequence all in one line together
110 //option 0 read all, 1 do not read sequences
111 void Refs::loadRefs(char *inpF, int option) {
116 if (!fin.is_open()) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
118 seqs.push_back(RefSeq());
124 success = seq.read(fin, option);
133 assert(M + 1 == (int)seqs.size());
135 if (verbose) { printf("Refs.loadRefs finished!\n"); }
138 void Refs::saveRefs(char* outF) {
142 for (int i = 1; i <= M; i++) {
147 if (verbose) { printf("Refs.saveRefs finished!\n"); }