X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=SingleModel.h;h=b822f7dc6773c3b5d48e5a3898737bc5f59126fe;hp=59db6ec69d30ce228535312f9cd91da476be2cae;hb=refs%2Fheads%2Fmaster;hpb=227db580833c14aa755c84ccb5401ce8c298e225 diff --git a/SingleModel.h b/SingleModel.h index 59db6ec..b822f7d 100644 --- a/SingleModel.h +++ b/SingleModel.h @@ -8,8 +8,11 @@ #include #include #include +#include +#include #include "utils.h" +#include "my_assert.h" #include "Orientation.h" #include "LenDist.h" #include "RSPD.h" @@ -102,7 +105,14 @@ public: int readLen = read.getReadLength(); int fpos = (dir == 0 ? pos : totLen - pos - readLen); // the aligned position reported in SAM file, should be a coordinate in forward strand - assert(fpos >= 0 && fpos + readLen <= totLen && readLen <= totLen); + general_assert(fpos >= 0, "The alignment of read " + read.getName() + " to transcript " + itos(sid) + " starts at " + itos(fpos) + \ + " from the forward direction, which should be a non-negative number! " + \ + "It is possible that the aligner you use gave different read lengths for a same read in SAM file."); + general_assert(fpos + readLen <= totLen,"Read " + read.getName() + " is hung over the end of transcript " + itos(sid) + "! " \ + + "It is possible that the aligner you use gave different read lengths for a same read in SAM file."); + general_assert(readLen <= totLen, "Read " + read.getName() + " has length " + itos(readLen) + ", but it is aligned to transcript " \ + + itos(sid) + ", whose length (" + itos(totLen) + ") is shorter than the read's length!"); + int seedPos = (dir == 0 ? pos : totLen - pos - seedLen); // the aligned position of the seed in forward strand coordinates if (seedPos >= fullLen || ref.getMask(seedPos)) return 0.0; @@ -221,11 +231,11 @@ public: const LenDist& getGLD() { return *gld; } - void startSimulation(simul*, double*); - bool simulate(int, SingleRead&, int&); + void startSimulation(simul*, const std::vector&); + bool simulate(READ_INT_TYPE, SingleRead&, int&); void finishSimulation(); - double* getMW() { + const double* getMW() { assert(mw != NULL); return mw; } @@ -237,7 +247,7 @@ private: static const int read_type = 0; int M; - int N[3]; + READ_INT_TYPE N[3]; Refs *refs; double mean, sd; int seedLen; @@ -271,21 +281,21 @@ void SingleModel::estimateFromReads(const char* readFN) { genReadFileNames(readFN, i, read_type, s, readFs); ReadReader reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function - int cnt = 0; + READ_INT_TYPE cnt = 0; while (reader.next(read)) { if (!read.isLowQuality()) { mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0); if (i == 0) { npro->updateC(read.getReadSeq()); } } else if (verbose && read.getReadLength() < seedLen) { - printf("Warning: Read %s is ignored due to read length %d < seed length %d!\n", read.getName().c_str(), read.getReadLength(), seedLen); + std::cout<< "Warning: Read "<< read.getName()<< " is ignored due to read length "<< read.getReadLength()<< " < seed length "<< seedLen<< "!"<< std::endl; } - + ++cnt; - if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); } + if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " READS PROCESSED"<< std::endl; } } - if (verbose) { printf("estimateFromReads, N%d finished.\n", i); } + if (verbose) { std::cout<< "estimateFromReads, N"<< i<< " finished."<< std::endl; } } mld != NULL ? mld->finish() : gld->finish(); @@ -381,7 +391,7 @@ void SingleModel::write(const char* outF) { fclose(fo); } -void SingleModel::startSimulation(simul* sampler, double* theta) { +void SingleModel::startSimulation(simul* sampler, const std::vector& theta) { this->sampler = sampler; theta_cdf = new double[M + 1]; @@ -395,7 +405,7 @@ void SingleModel::startSimulation(simul* sampler, double* theta) { npro->startSimulation(); } -bool SingleModel::simulate(int rid, SingleRead& read, int& sid) { +bool SingleModel::simulate(READ_INT_TYPE rid, SingleRead& read, int& sid) { int dir, pos, readLen, fragLen; std::string name; std::string readseq;