X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=PairedEndQModel.h;h=2c638abed103bcf4f7167b70b60221aa9eafed88;hb=cb94fd597b180aa7cb01ae84c9d1025201b98d8e;hp=64e486b66b98d19fafc98bce67ef0a5bd85131ad;hpb=7b8405a9fe481d041d7e50d1d8abb4b589bdc0d3;p=rsem.git diff --git a/PairedEndQModel.h b/PairedEndQModel.h index 64e486b..2c638ab 100644 --- a/PairedEndQModel.h +++ b/PairedEndQModel.h @@ -236,22 +236,27 @@ void PairedEndQModel::estimateFromReads(const char* readFN) { for (int i = 0; i < 3; i++) if (N[i] > 0) { genReadFileNames(readFN, i, read_type, s, readFs); - ReadReader reader(s, readFs); + ReadReader reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function int cnt = 0; while (reader.next(read)) { SingleReadQ mate1 = read.getMate1(); SingleReadQ mate2 = read.getMate2(); - mld->update(mate1.getReadLength(), 1.0); - mld->update(mate2.getReadLength(), 1.0); - - qd->update(mate1.getQScore()); - qd->update(mate2.getQScore()); - - if (i == 0) { - nqpro->updateC(mate1.getReadSeq(), mate1.getQScore()); - nqpro->updateC(mate2.getReadSeq(), mate2.getQScore()); + if (!read.isLowQuality()) { + mld->update(mate1.getReadLength(), 1.0); + mld->update(mate2.getReadLength(), 1.0); + + qd->update(mate1.getQScore()); + qd->update(mate2.getQScore()); + + if (i == 0) { + nqpro->updateC(mate1.getReadSeq(), mate1.getQScore()); + nqpro->updateC(mate2.getReadSeq(), mate2.getQScore()); + } + } + else if (verbose && (mate1.getReadLength() < seedLen || mate2.getReadLength() < seedLen)) { + printf("Warning: Read %s is ignored due to at least one of the mates' length < seed length %d!\n", read.getName().c_str(), seedLen); } ++cnt; @@ -422,39 +427,39 @@ void PairedEndQModel::finishSimulation() { void PairedEndQModel::calcMW() { - assert(seedLen >= OLEN && mld->getMinL() >= seedLen); - - memset(mw, 0, sizeof(double) * (M + 1)); - mw[0] = 1.0; - - for (int i = 1; i <= M; i++) { - RefSeq& ref = refs->getRef(i); - int totLen = ref.getTotLen(); - int fullLen = ref.getFullLen(); - int end = std::min(fullLen, totLen - gld->getMinL() + 1); - double value = 0.0; - int minL, maxL; - int effL, pfpos; - - //seedPos is fpos here - for (int seedPos = 0; seedPos < end; seedPos++) - if (ref.getMask(seedPos)) { - minL = gld->getMinL(); - maxL = std::min(gld->getMaxL(), totLen - seedPos); - pfpos = seedPos; - for (int fragLen = minL; fragLen <= maxL; fragLen++) { - effL = std::min(fullLen, totLen - fragLen + 1); - value += gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen); - } - } + assert(mld->getMinL() >= seedLen); + + memset(mw, 0, sizeof(double) * (M + 1)); + mw[0] = 1.0; + + for (int i = 1; i <= M; i++) { + RefSeq& ref = refs->getRef(i); + int totLen = ref.getTotLen(); + int fullLen = ref.getFullLen(); + int end = std::min(fullLen, totLen - gld->getMinL() + 1); + double value = 0.0; + int minL, maxL; + int effL, pfpos; + + //seedPos is fpos here + for (int seedPos = 0; seedPos < end; seedPos++) + if (ref.getMask(seedPos)) { + minL = gld->getMinL(); + maxL = std::min(gld->getMaxL(), totLen - seedPos); + pfpos = seedPos; + for (int fragLen = minL; fragLen <= maxL; fragLen++) { + effL = std::min(fullLen, totLen - fragLen + 1); + value += gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen); + } + } - mw[i] = 1.0 - value; + mw[i] = 1.0 - value; - if (mw[i] < 1e-8) { - // fprintf(stderr, "Warning: %dth reference sequence is masked for almost all positions!\n", i); - mw[i] = 0.0; - } - } + if (mw[i] < 1e-8) { + // fprintf(stderr, "Warning: %dth reference sequence is masked for almost all positions!\n", i); + mw[i] = 0.0; + } + } } #endif /* PAIREDENDQMODEL_H_ */