delete[] mhps;
}
+int tmp_n;
+
inline bool doesUpdateModel(int ROUND) {
- //return false; // never update, for debugging only
- return ROUND <= 20 || ROUND % 100 == 0;
+ // return ROUND <= 20 || ROUND % 100 == 0;
+ return ROUND <= 10;
}
//Including initialize, algorithm and results saving
#include<cstdio>
#include<cstring>
+#include<cstdlib>
#include<cassert>
#include<algorithm>
sum += pdf[i];
}
+ if (sum <= EPSILON) { fprintf(stderr, "No valid read to estimate the length distribution!\n"); exit(-1); }
+
for (int i = 1; i <= span; i++) {
pdf[i] = pdf[i] / sum;
cdf[i] = cdf[i - 1] + pdf[i];
logp = 0.0;
sum = 0.0;
for (int i = 0; i < NCODES; i++) sum += (p[i] + c[i]);
- if (sum <= 0.0) return;
+ if (sum <= EPSILON) return;
for (int i = 0; i < NCODES; i++) {
p[i] = (p[i] + c[i]) / sum;
if (c[i] > 0.0) { logp += c[i] * log(p[i]); }
while (reader.next(read)) {
SingleRead mate1 = read.getMate1();
SingleRead mate2 = read.getMate2();
-
- mld->update(mate1.getReadLength(), 1.0);
- mld->update(mate2.getReadLength(), 1.0);
-
- if (i == 0) {
- npro->updateC(mate1.getReadSeq());
- npro->updateC(mate2.getReadSeq());
- }
+
+ if (!read.isLowQuality()) {
+ mld->update(mate1.getReadLength(), 1.0);
+ mld->update(mate2.getReadLength(), 1.0);
+
+ if (i == 0) {
+ npro->updateC(mate1.getReadSeq());
+ npro->updateC(mate2.getReadSeq());
+ }
+ }
+ else if (verbose && (mate1.getReadLength() < OLEN || mate2.getReadLength() < OLEN)) {
+ printf("Warning: Read %s is ignored due to at least one of the mates' length < %d!\n", read.getName().c_str(), OLEN);
+ }
++cnt;
if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
SingleReadQ mate1 = read.getMate1();
SingleReadQ mate2 = read.getMate2();
- mld->update(mate1.getReadLength(), 1.0);
- mld->update(mate2.getReadLength(), 1.0);
-
- qd->update(mate1.getQScore());
- qd->update(mate2.getQScore());
-
- if (i == 0) {
- nqpro->updateC(mate1.getReadSeq(), mate1.getQScore());
- nqpro->updateC(mate2.getReadSeq(), mate2.getQScore());
- }
+ if (!read.isLowQuality()) {
+ mld->update(mate1.getReadLength(), 1.0);
+ mld->update(mate2.getReadLength(), 1.0);
+
+ qd->update(mate1.getQScore());
+ qd->update(mate2.getQScore());
+
+ if (i == 0) {
+ nqpro->updateC(mate1.getReadSeq(), mate1.getQScore());
+ nqpro->updateC(mate2.getReadSeq(), mate2.getQScore());
+ }
+ }
+ else if (verbose && (mate1.getReadLength() < OLEN || mate2.getReadLength() < OLEN)) {
+ printf("Warning: Read %s is ignored due to at least one of the mates' length < %d!\n", read.getName().c_str(), OLEN);
+ }
++cnt;
if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
void PairedEndRead::calc_lq() {
low_quality = mate1.isLowQuality() && mate2.isLowQuality();
+ if (mate1.getReadLength() < OLEN || mate2.getReadLength() < OLEN) low_quality = true;
}
#endif
void PairedEndReadQ::calc_lq() {
low_quality = mate1.isLowQuality() && mate2.isLowQuality();
+ if (mate1.getReadLength() < OLEN || mate2.getReadLength() < OLEN) low_quality = true;
}
#endif /* PAIREDENDREADQ_H_ */
int cnt = 0;
while (reader.next(read)) {
- mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
- if (i == 0) { npro->updateC(read.getReadSeq()); }
+ if (!read.isLowQuality()) {
+ mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
+ if (i == 0) { npro->updateC(read.getReadSeq()); }
+ }
+ else if (verbose && read.getReadLength() < OLEN) { printf("Warning: Read %s is ignored due to read length < %d!\n", read.getName().c_str(), OLEN); }
++cnt;
if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
void SingleModel::calcMW() {
double probF, probR;
-
+
assert(seedLen >= OLEN && (mld == NULL ? gld->getMinL() : mld->getMinL()) >= seedLen);
memset(mw, 0, sizeof(double) * (M + 1));
int cnt = 0;
while (reader.next(read)) {
- mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
- qd->update(read.getQScore());
- if (i == 0) { nqpro->updateC(read.getReadSeq(), read.getQScore()); }
+ if (!read.isLowQuality()) {
+ mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
+ qd->update(read.getQScore());
+ if (i == 0) { nqpro->updateC(read.getReadSeq(), read.getQScore()); }
+ }
+ else if (verbose && read.getReadLength() < OLEN) { printf("Warning: Read %s is ignored due to read length < %d!\n", read.getName().c_str(), OLEN); }
++cnt;
if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
ori->read(fi);
gld->read(fi);
- fscanf(fi, "%d", &val);
+ assert(fscanf(fi, "%d", &val) == 1);
if (val > 0) {
if (mld == NULL) mld = new LenDist();
mld->read(fi);
int numA = 0, numT = 0, numAO = 0, numTO = 0; // numAO : number of A in overlap seed region
int threshold_1, threshold_2;
+ if (len < OLEN) { low_quality = true; return; }
+
threshold_1 = int(0.9 * len - 1.5 * sqrt(len * 1.0) + 0.5);
threshold_2 = (OLEN - 1) / 2 + 1;
for (int i = 0; i < len; i++) {
int numA = 0, numT = 0, numAO = 0, numTO = 0; // numAO : number of A in overlap seed region
int threshold_1, threshold_2;
+ if (len < OLEN) { low_quality = true; return; }
+
threshold_1 = int(0.9 * len - 1.5 * sqrt(len * 1.0) + 0.5);
threshold_2 = (OLEN - 1) / 2 + 1;
for (int i = 0; i < len; i++) {
}
++nReads;
- if (verbose && nReads % 1000000 == 0) { printf("FIN %lld\n", nReads); }
+ if (verbose && nReads % 1000000 == 0) { printf("FIN %lld\n", (long long)nReads); }
} while (success);
fout.seekp(startPos);
pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
+pod2usage(-msg => "Seed length should be at least 25!\n", -exitval => 2, -verbose => 2) if ($L < 25);
if ($strand_specific) { $probF = 1.0; }
=item B<--seed-length> <int>
-Seed length used by the read aligner. Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. If RSEM runs Bowtie, it uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default: 25)
+Seed length used by the read aligner. Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. If RSEM runs Bowtie, it uses this value for Bowtie's seed length parameter. The minimum value is 25. Any read with its or at least one of its mates' (for paired-end reads) length less than 25 will be ignored. (Default: 25)
=item B<--tag> <string>
=cut
-# LocalWords: usr