mw = NULL;
if (isMaster) {
- ori = new Orientation(params.probF);
gld = new LenDist(params.minL, params.maxL);
if (mean >= EPSILON) {
mld = new LenDist(params.mate_minL, params.mate_maxL);
qd = new QualDist();
}
+ ori = new Orientation(params.probF);
if (estRSPD) { rspd = new RSPD(estRSPD, params.B); }
qpro = new QProfile();
nqpro = new NoiseQProfile();
void update(const SingleReadQ& read, const SingleHit& hit, double frac) {
if (read.isLowQuality() || frac < EPSILON) return;
- RefSeq& ref = refs->getRef(hit.getSid());
+ const RefSeq& ref = refs->getRef(hit.getSid());
+
int dir = hit.getDir();
int pos = hit.getPos();
for (int i = 0; i < 3; i++)
if (N[i] > 0) {
genReadFileNames(readFN, i, read_type, s, readFs);
- ReadReader<SingleReadQ> reader(s, readFs);
+ ReadReader<SingleReadQ> reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function
int cnt = 0;
while (reader.next(read)) {
- mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
- qd->update(read.getQScore());
- if (i == 0) { nqpro->updateC(read.getReadSeq(), read.getQScore()); }
+ if (!read.isLowQuality()) {
+ mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
+ qd->update(read.getQScore());
+ if (i == 0) { nqpro->updateC(read.getReadSeq(), read.getQScore()); }
+ }
+ else if (verbose && read.getReadLength() < seedLen) {
+ printf("Warning: Read %s is ignored due to read length %d < seed length %d!\n", read.getName().c_str(), read.getReadLength(), seedLen);
+ }
++cnt;
if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
FILE *fi = fopen(inpF, "r");
if (fi == NULL) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
- fscanf(fi, "%d", &val);
+ assert(fscanf(fi, "%d", &val) == 1);
assert(val == model_type);
ori->read(fi);
gld->read(fi);
- fscanf(fi, "%d", &val);
+ assert(fscanf(fi, "%d", &val) == 1);
if (val > 0) {
if (mld == NULL) mld = new LenDist();
mld->read(fi);
qpro->read(fi);
nqpro->read(fi);
- if (fscanf(fi, "%d", &M) == 1) {
- mw = new double[M + 1];
- for (int i = 0; i <= M; i++) fscanf(fi, "%lf", &mw[i]);
+ if (fscanf(fi, "%d", &val) == 1) {
+ if (M == 0) M = val;
+ if (M == val) {
+ mw = new double[M + 1];
+ for (int i = 0; i <= M; i++) assert(fscanf(fi, "%lf", &mw[i]) == 1);
+ }
}
fclose(fi);
}
-//Only master node can call
+//Only master node can call. Only be called at EM.cpp
void SingleQModel::write(const char* outF) {
FILE *fo = fopen(outF, "w");
}
}
- std::ostringstream stdout;
- stdout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos;
- name = stdout.str();
+ strout<<rid<<"_"<<dir<<"_"<<sid<<"_"<<pos;
+ name = strout.str();
read = SingleReadQ(name, readseq, qual);
}
void SingleQModel::calcMW() {
- double probF, probR;
-
- assert(seedLen >= OLEN && (mld == NULL ? gld->getMinL() : mld->getMinL()) >= seedLen);
-
- memset(mw, 0, sizeof(double) * (M + 1));
- mw[0] = 1.0;
-
-
- probF = ori->getProb(0);
- probR = ori->getProb(1);
-
- for (int i = 1; i <= M; i++) {
- RefSeq& ref = refs->getRef(i);
- int totLen = ref.getTotLen();
- int fullLen = ref.getFullLen();
- double value = 0.0;
- int minL, maxL;
- int effL, pfpos;
- int end = std::min(fullLen, totLen - seedLen + 1);
- double factor;
-
- for (int seedPos = 0; seedPos < end; seedPos++)
- if (ref.getMask(seedPos)) {
- //forward
- minL = gld->getMinL();
- maxL = std::min(gld->getMaxL(), totLen - seedPos);
- pfpos = seedPos;
- for (int fragLen = minL; fragLen <= maxL; fragLen++) {
- effL = std::min(fullLen, totLen - fragLen + 1);
- factor = (mld == NULL ? 1.0 : mld->getAdjustedCumulativeProb(std::min(mld->getMaxL(), fragLen), fragLen));
- value += probF * gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen) * factor;
- }
- //reverse
- minL = gld->getMinL();
- maxL = std::min(gld->getMaxL(), seedPos + seedLen);
- for (int fragLen = minL; fragLen <= maxL; fragLen++) {
- pfpos = seedPos - (fragLen - seedLen);
- effL = std::min(fullLen, totLen - fragLen + 1);
- factor = (mld == NULL ? 1.0 : mld->getAdjustedCumulativeProb(std::min(mld->getMaxL(), fragLen), fragLen));
- value += probR * gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen) * factor;
+ double probF, probR;
+
+ assert((mld == NULL ? gld->getMinL() : mld->getMinL()) >= seedLen);
+
+ memset(mw, 0, sizeof(double) * (M + 1));
+ mw[0] = 1.0;
+
+ probF = ori->getProb(0);
+ probR = ori->getProb(1);
+
+ for (int i = 1; i <= M; i++) {
+ RefSeq& ref = refs->getRef(i);
+ int totLen = ref.getTotLen();
+ int fullLen = ref.getFullLen();
+ double value = 0.0;
+ int minL, maxL;
+ int effL, pfpos;
+ int end = std::min(fullLen, totLen - seedLen + 1);
+ double factor;
+
+ for (int seedPos = 0; seedPos < end; seedPos++)
+ if (ref.getMask(seedPos)) {
+ //forward
+ minL = gld->getMinL();
+ maxL = std::min(gld->getMaxL(), totLen - seedPos);
+ pfpos = seedPos;
+ for (int fragLen = minL; fragLen <= maxL; fragLen++) {
+ effL = std::min(fullLen, totLen - fragLen + 1);
+ factor = (mld == NULL ? 1.0 : mld->getAdjustedCumulativeProb(std::min(mld->getMaxL(), fragLen), fragLen));
+ value += probF * gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen) * factor;
+ }
+ //reverse
+ minL = gld->getMinL();
+ maxL = std::min(gld->getMaxL(), seedPos + seedLen);
+ for (int fragLen = minL; fragLen <= maxL; fragLen++) {
+ pfpos = seedPos - (fragLen - seedLen);
+ effL = std::min(fullLen, totLen - fragLen + 1);
+ factor = (mld == NULL ? 1.0 : mld->getAdjustedCumulativeProb(std::min(mld->getMaxL(), fragLen), fragLen));
+ value += probR * gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen) * factor;
+ }
+ }
+
+ //for reverse strand masking
+ for (int seedPos = end; seedPos <= totLen - seedLen; seedPos++) {
+ minL = std::max(gld->getMinL(), seedPos + seedLen - fullLen + 1);
+ maxL = std::min(gld->getMaxL(), seedPos + seedLen);
+ for (int fragLen = minL; fragLen <= maxL; fragLen++) {
+ pfpos = seedPos - (fragLen - seedLen);
+ effL = std::min(fullLen, totLen - fragLen + 1);
+ factor = (mld == NULL ? 1.0 : mld->getAdjustedCumulativeProb(std::min(mld->getMaxL(), fragLen), fragLen));
+ value += probR * gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen) * factor;
+ }
+ }
+
+ mw[i] = 1.0 - value;
+
+ if (mw[i] < 1e-8) {
+ // fprintf(stderr, "Warning: %dth reference sequence is masked for almost all positions!\n", i);
+ mw[i] = 0.0;
+ }
}
- }
-
- //for reverse strand masking
- for (int seedPos = end; seedPos <= totLen - seedLen; seedPos++) {
- minL = std::max(gld->getMinL(), seedPos + seedLen - fullLen + 1);
- maxL = std::min(gld->getMaxL(), seedPos + seedLen);
- for (int fragLen = minL; fragLen <= maxL; fragLen++) {
- pfpos = seedPos - (fragLen - seedLen);
- effL = std::min(fullLen, totLen - fragLen + 1);
- factor = (mld == NULL ? 1.0 : mld->getAdjustedCumulativeProb(std::min(mld->getMaxL(), fragLen), fragLen));
- value += probR * gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen) * factor;
- }
- }
-
- mw[i] = 1.0 - value;
-
- if (mw[i] < 1e-8) {
- // fprintf(stderr, "Warning: %dth reference sequence is masked for almost all positions!\n", i);
- mw[i] = 0.0;
- }
- }
}
#endif /* SINGLEQMODEL_H_ */