#include<sstream>
#include "utils.h"
+#include "my_assert.h"
#include "Orientation.h"
#include "LenDist.h"
#include "RSPD.h"
int fpos = (dir == 0 ? pos : totLen - pos - insertLen); // the aligned position reported in SAM file, should be a coordinate in forward strand
int effL = std::min(fullLen, totLen - insertLen + 1);
- assert(fpos >= 0 && fpos + insertLen <= totLen && insertLen <= totLen);
+ general_assert(fpos >= 0, "The alignment of fragment " + read.getName() + " to transcript " + itos(sid) + " starts at " + itos(fpos) + \
+ " from the forward direction, which should be a non-negative number! " + \
+ "It is possible that the aligner you use gave different read lengths for a same read in SAM file.");
+ general_assert(fpos + insertLen <= totLen,"Fragment " + read.getName() + " is hung over the end of transcript " + itos(sid) + "! " \
+ + "It is possible that the aligner you use gave different read lengths for a same read in SAM file.");
+ general_assert(insertLen <= totLen, "Fragment " + read.getName() + " has length " + itos(insertLen) + ", but it is aligned to transcript " \
+ + itos(sid) + ", whose length (" + itos(totLen) + ") is shorter than the fragment's length!");
+
+
if (fpos >= fullLen || ref.getMask(fpos)) return 0.0; // For paired-end model, fpos is the seedPos
prob = ori->getProb(dir) * gld->getAdjustedProb(insertLen, totLen) *
for (int i = 0; i < 3; i++)
if (N[i] > 0) {
genReadFileNames(readFN, i, read_type, s, readFs);
- ReadReader<PairedEndRead> reader(s, readFs);
+ ReadReader<PairedEndRead> reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function
int cnt = 0;
while (reader.next(read)) {
SingleRead mate1 = read.getMate1();
SingleRead mate2 = read.getMate2();
-
- mld->update(mate1.getReadLength(), 1.0);
- mld->update(mate2.getReadLength(), 1.0);
-
- if (i == 0) {
- npro->updateC(mate1.getReadSeq());
- npro->updateC(mate2.getReadSeq());
+
+ if (!read.isLowQuality()) {
+ mld->update(mate1.getReadLength(), 1.0);
+ mld->update(mate2.getReadLength(), 1.0);
+
+ if (i == 0) {
+ npro->updateC(mate1.getReadSeq());
+ npro->updateC(mate2.getReadSeq());
+ }
+ }
+ else if (verbose && (mate1.getReadLength() < seedLen || mate2.getReadLength() < seedLen)) {
+ printf("Warning: Read %s is ignored due to at least one of the mates' length < seed length %d!\n", read.getName().c_str(), seedLen);
}
++cnt;
FILE *fi = fopen(inpF, "r");
if (fi == NULL) { fprintf(stderr, "Cannot open %s! It may not exist.\n", inpF); exit(-1); }
- fscanf(fi, "%d", &val);
+ assert(fscanf(fi, "%d", &val) == 1);
assert(val == model_type);
ori->read(fi);
if (M == 0) M = val;
if (M == val) {
mw = new double[M + 1];
- for (int i = 0; i <= M; i++) fscanf(fi, "%lf", &mw[i]);
+ for (int i = 0; i <= M; i++) assert(fscanf(fi, "%lf", &mw[i]) == 1);
}
}
}
void PairedEndModel::calcMW() {
- assert(seedLen >= OLEN && mld->getMinL() >= seedLen);
-
- memset(mw, 0, sizeof(double) * (M + 1));
- mw[0] = 1.0;
-
- for (int i = 1; i <= M; i++) {
- RefSeq& ref = refs->getRef(i);
- int totLen = ref.getTotLen();
- int fullLen = ref.getFullLen();
- int end = std::min(fullLen, totLen - gld->getMinL() + 1);
- double value = 0.0;
- int minL, maxL;
- int effL, pfpos;
-
- //seedPos is fpos here
- for (int seedPos = 0; seedPos < end; seedPos++)
- if (ref.getMask(seedPos)) {
- minL = gld->getMinL();
- maxL = std::min(gld->getMaxL(), totLen - seedPos);
- pfpos = seedPos;
- for (int fragLen = minL; fragLen <= maxL; fragLen++) {
- effL = std::min(fullLen, totLen - fragLen + 1);
- value += gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen);
+ assert(mld->getMinL() >= seedLen);
+
+ memset(mw, 0, sizeof(double) * (M + 1));
+ mw[0] = 1.0;
+
+ for (int i = 1; i <= M; i++) {
+ RefSeq& ref = refs->getRef(i);
+ int totLen = ref.getTotLen();
+ int fullLen = ref.getFullLen();
+ int end = std::min(fullLen, totLen - gld->getMinL() + 1);
+ double value = 0.0;
+ int minL, maxL;
+ int effL, pfpos;
+
+ //seedPos is fpos here
+ for (int seedPos = 0; seedPos < end; seedPos++)
+ if (ref.getMask(seedPos)) {
+ minL = gld->getMinL();
+ maxL = std::min(gld->getMaxL(), totLen - seedPos);
+ pfpos = seedPos;
+ for (int fragLen = minL; fragLen <= maxL; fragLen++) {
+ effL = std::min(fullLen, totLen - fragLen + 1);
+ value += gld->getAdjustedProb(fragLen, totLen) * rspd->getAdjustedProb(pfpos, effL, fullLen);
+ }
+ }
+
+ mw[i] = 1.0 - value;
+
+ if (mw[i] < 1e-8) {
+ //fprintf(stderr, "Warning: %dth reference sequence is masked for almost all positions!\n", i);
+ mw[i] = 0.0;
+ }
}
- }
-
- mw[i] = 1.0 - value;
-
- if (mw[i] < 1e-8) {
- //fprintf(stderr, "Warning: %dth reference sequence is masked for almost all positions!\n", i);
- mw[i] = 0.0;
- }
- }
}
#endif /* PAIREDENDMODEL_H_ */