std::string cqname;
bool isPaired = false;
- int cnt = 0;
+ HIT_INT_TYPE cnt = 0;
cqname = "";
b = bam_init1(); b2 = bam_init1();
#include<cassert>
#include<string>
#include<sstream>
+#include<iostream>
#include <stdint.h>
#include "sam/bam.h"
#include "sam_rsem_aux.h"
#include "sam_rsem_cvt.h"
+#include "utils.h"
+
#include "SingleHit.h"
#include "PairedEndHit.h"
bam1_t *b;
SingleHit *hit;
- int cnt = 0;
+ HIT_INT_TYPE cnt = 0;
b = bam_init1();
while (samread(in, b) >= 0) {
++cnt;
- if (verbose && cnt % 1000000 == 0) { printf("%d alignment lines are loaded!\n", cnt); }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
if (b->core.flag & 0x0004) continue;
assert(wrapper.getNextHit() == NULL);
bam_destroy1(b);
- if (verbose) { printf("Bam output file is generated!\n"); }
+ if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
}
void BamWriter::work(HitWrapper<PairedEndHit> wrapper) {
while (samread(in, b) >= 0 && samread(in, b2) >= 0) {
cnt += 2;
- if (verbose && cnt % 1000000 == 0) { printf("%d alignment lines are loaded!\n", cnt); }
-
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< "alignment lines are loaded!"<< std::endl; }
//mate info is not complete, skip
if (!(((b->core.flag & 0x0040) && (b2->core.flag & 0x0080)) || ((b->core.flag & 0x0080) && (b2->core.flag & 0x0040)))) continue;
//unalignable reads, skip
bam_destroy1(b);
bam_destroy1(b2);
- if (verbose) { printf("Bam output file is generated!\n"); }
+ if (verbose) { std::cout<< "Bam output file is generated!"<< std::endl; }
}
void BamWriter::convert(bam1_t *b, double prb) {
#include<string>
#include<vector>
#include<algorithm>
+#include<fstream>
+#include<iostream>
#include<pthread.h>
#include "utils.h"
int read_type;
int m, M; // m genes, M isoforms
-int N0, N1, N2, N_tot;
+READ_INT_TYPE N0, N1, N2, N_tot;
int nThreads;
char refName[STRLEN], outName[STRLEN];
char imdName[STRLEN], statName[STRLEN];
char refF[STRLEN], groupF[STRLEN], cntF[STRLEN], tiF[STRLEN];
-char mparamsF[STRLEN], bmparamsF[STRLEN];
+char mparamsF[STRLEN];
char modelF[STRLEN], thetaF[STRLEN];
char inpSamType;
template<class ReadType, class HitType, class ModelType>
void init(ReadReader<ReadType> **&readers, HitContainer<HitType> **&hitvs, double **&ncpvs, ModelType **&mhps) {
- int nReads, nHits, rt;
- int nrLeft, nhT, curnr; // nrLeft : number of reads left, nhT : hit threshold per thread, curnr: current number of reads
+ READ_INT_TYPE nReads;
+ HIT_INT_TYPE nHits;
+ int rt; // read type
+
+ READ_INT_TYPE nrLeft, curnr; // nrLeft : number of reads left, curnr: current number of reads
+ HIT_INT_TYPE nhT; // nhT : hit threshold per thread
char datF[STRLEN];
int s;
ncpvs = new double*[nThreads];
for (int i = 0; i < nThreads; i++) {
- int ntLeft = nThreads - i - 1; // # of threads left
+ HIT_INT_TYPE ntLeft = nThreads - i - 1; // # of threads left
general_assert(readers[i]->locate(curnr), "Read indices files do not match!");
general_assert(hitvs[i]->read(fin), "Cannot read alignments from .dat file!");
--nrLeft;
- if (verbose && nrLeft % 1000000 == 0) { printf("DAT %d reads left!\n", nrLeft); }
+ if (verbose && nrLeft % 1000000 == 0) { cout<< "DAT "<< nrLeft << " reads left"<< endl; }
}
ncpvs[i] = new double[hitvs[i]->getN()];
memset(ncpvs[i], 0, sizeof(double) * hitvs[i]->getN());
curnr += hitvs[i]->getN();
- if (verbose) { printf("Thread %d : N = %d, NHit = %d\n", i, hitvs[i]->getN(), hitvs[i]->getNHits()); }
+ if (verbose) { cout<<"Thread "<< i<< " : N = "<< hitvs[i]->getN()<< ", NHit = "<< hitvs[i]->getNHits()<< endl; }
}
fin.close();
ReadType read;
- int N = hitv->getN();
+ READ_INT_TYPE N = hitv->getN();
double sum;
vector<double> fracs; //to remove this, do calculation twice
- int fr, to, id;
+ HIT_INT_TYPE fr, to, id;
if (needCalcConPrb || updateModel) { reader->reset(); }
if (updateModel) { mhp->init(); }
memset(countv, 0, sizeof(double) * (M + 1));
- for (int i = 0; i < N; i++) {
+ for (READ_INT_TYPE i = 0; i < N; i++) {
if (needCalcConPrb || updateModel) {
general_assert(reader->next(read), "Can not load a read!");
}
fracs[0] = probv[0] * ncpv[i];
if (fracs[0] < EPSILON) fracs[0] = 0.0;
sum += fracs[0];
- for (int j = fr; j < to; j++) {
+ for (HIT_INT_TYPE j = fr; j < to; j++) {
HitType &hit = hitv->getHitAt(j);
if (needCalcConPrb) { hit.setConPrb(model->getConPrb(read, hit)); }
id = j - fr + 1;
countv[0] += fracs[0];
if (updateModel) { mhp->updateNoise(read, fracs[0]); }
if (calcExpectedWeights) { ncpv[i] = fracs[0]; }
- for (int j = fr; j < to; j++) {
+ for (HIT_INT_TYPE j = fr; j < to; j++) {
HitType &hit = hitv->getHitAt(j);
id = j - fr + 1;
fracs[id] /= sum;
}
else if (calcExpectedWeights) {
ncpv[i] = 0.0;
- for (int j = fr; j < to; j++) {
+ for (HIT_INT_TYPE j = fr; j < to; j++) {
HitType &hit = hitv->getHitAt(j);
hit.setConPrb(0.0);
}
double *ncpv = (double*)(params->ncpv);
ReadType read;
- int N = hitv->getN();
- int fr, to;
+ READ_INT_TYPE N = hitv->getN();
+ HIT_INT_TYPE fr, to;
assert(model->getNeedCalcConPrb());
reader->reset();
- for (int i = 0; i < N; i++) {
+ for (READ_INT_TYPE i = 0; i < N; i++) {
general_assert(reader->next(read), "Can not load a read!");
fr = hitv->getSAt(i);
to = hitv->getSAt(i + 1);
ncpv[i] = model->getNoiseConPrb(read);
- for (int j = fr; j < to; j++) {
+ for (HIT_INT_TYPE j = fr; j < to; j++) {
HitType &hit = hitv->getHitAt(j);
hit.setConPrb(model->getConPrb(read, hit));
}
double sum;
double bChange = 0.0, change = 0.0; // bChange : biggest change
- int totNum = 0;
+ READ_INT_TYPE totNum = 0;
ModelType model(mparams); //master model
ReadReader<ReadType> **readers;
if (bChange < change) bChange = change;
}
- if (verbose) printf("ROUND = %d, SUM = %.15g, bChange = %f, totNum = %d\n", ROUND, sum, bChange, totNum);
+ if (verbose) { cout<< "ROUND = "<< ROUND<< ", SUM = "<< setprecision(15)<< sum<< ", bChange = " << setprecision(6)<< bChange<< ", totNum = %" << totNum<< endl; }
} while (ROUND < MIN_ROUND || (totNum > 0 && ROUND < MAX_ROUND));
// } while (ROUND < 1);
- if (totNum > 0) fprintf(stderr, "Warning: RSEM reaches %d iterations before meeting the convergence criteria.\n", MAX_ROUND);
+ if (totNum > 0) { cout<< "Warning: RSEM reaches "<< MAX_ROUND<< " iterations before meeting the convergence criteria."<< endl; }
//generate output file used by Gibbs sampler
if (genGibbsOut) {
model.setNeedCalcConPrb(false);
sprintf(out_for_gibbs_F, "%s.ofg", imdName);
- fo = fopen(out_for_gibbs_F, "w");
- fprintf(fo, "%d %d\n", M, N0);
+ ofstream fout(out_for_gibbs_F);
+ fout<< M<< " "<< N0<< endl;
for (int i = 0; i < nThreads; i++) {
- int numN = hitvs[i]->getN();
- for (int j = 0; j < numN; j++) {
- int fr = hitvs[i]->getSAt(j);
- int to = hitvs[i]->getSAt(j + 1);
- int totNum = 0;
-
- if (ncpvs[i][j] >= EPSILON) { ++totNum; fprintf(fo, "%d %.15g ", 0, ncpvs[i][j]); }
- for (int k = fr; k < to; k++) {
+ READ_INT_TYPE numN = hitvs[i]->getN();
+ for (READ_INT_TYPE j = 0; j < numN; j++) {
+ HIT_INT_TYPE fr = hitvs[i]->getSAt(j);
+ HIT_INT_TYPE to = hitvs[i]->getSAt(j + 1);
+ HIT_INT_TYPE totNum = 0;
+
+ if (ncpvs[i][j] >= EPSILON) { ++totNum; fout<< "0 "<< setprecision(15)<< ncpvs[i][j]<< " "; }
+ for (HIT_INT_TYPE k = fr; k < to; k++) {
HitType &hit = hitvs[i]->getHitAt(k);
if (hit.getConPrb() >= EPSILON) {
++totNum;
- fprintf(fo, "%d %.15g ", hit.getSid(), hit.getConPrb());
+ fout<< hit.getSid()<< " "<< setprecision(15)<< hit.getConPrb()<< " ";
}
}
- if (totNum > 0) { fprintf(fo, "\n"); }
+ if (totNum > 0) { fout<< endl; }
}
}
- fclose(fo);
+ fout.close();
}
sprintf(thetaF, "%s.theta", statName);
sprintf(outBamF, "%s.transcript.bam", outName);
if (bamSampling) {
- int local_N;
- int fr, to, len, id;
+ READ_INT_TYPE local_N;
+ HIT_INT_TYPE fr, to, len, id;
vector<double> arr;
uniform01 rg(engine_type(time(NULL)));
- if (verbose) printf("Begin to sample reads from their posteriors.\n");
+ if (verbose) cout<< "Begin to sample reads from their posteriors."<< endl;
for (int i = 0; i < nThreads; i++) {
local_N = hitvs[i]->getN();
- for (int j = 0; j < local_N; j++) {
+ for (READ_INT_TYPE j = 0; j < local_N; j++) {
fr = hitvs[i]->getSAt(j);
to = hitvs[i]->getSAt(j + 1);
len = to - fr + 1;
arr.assign(len, 0);
arr[0] = ncpvs[i][j];
- for (int k = fr; k < to; k++) arr[k - fr + 1] = arr[k - fr] + hitvs[i]->getHitAt(k).getConPrb();
+ for (HIT_INT_TYPE k = fr; k < to; k++) arr[k - fr + 1] = arr[k - fr] + hitvs[i]->getHitAt(k).getConPrb();
id = (arr[len - 1] < EPSILON ? -1 : sample(rg, arr, len)); // if all entries in arr are 0, let id be -1
- for (int k = fr; k < to; k++) hitvs[i]->getHitAt(k).setConPrb(k - fr + 1 == id ? 1.0 : 0.0);
+ for (HIT_INT_TYPE k = fr; k < to; k++) hitvs[i]->getHitAt(k).setConPrb(k - fr + 1 == id ? 1.0 : 0.0);
}
}
- if (verbose) printf("Sampling is finished.\n");
+ if (verbose) cout<< "Sampling is finished."<< endl;
}
BamWriter writer(inpSamType, inpSamF, pt_fn_list, outBamF, transcripts);
general_assert(N1 > 0, "There are no alignable reads!");
- if (nThreads > N1) nThreads = N1;
+ if ((READ_INT_TYPE)nThreads > N1) nThreads = N1;
//set model parameters
mparams.M = M;
int nThreads;
int model_type;
-int m, M, N0, N1, nHits;
+int m, M;
+READ_INT_TYPE N0, N1;
+HIT_INT_TYPE nHits;
double totc;
int BURNIN, NSAMPLES, GAP;
char imdName[STRLEN], statName[STRLEN];
Refs refs;
GroupInfo gi;
-vector<int> s;
+vector<HIT_INT_TYPE> s;
vector<Item> hits;
vector<double> theta;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
- if (verbose) { printf("Initialization finished!"); }
+ if (verbose) { printf("Initialization finished!\n"); }
}
//sample theta from Dir(1)
}
void* Gibbs(void* arg) {
- int len, fr, to;
int CHAINLEN;
+ HIT_INT_TYPE len, fr, to;
Params *params = (Params*)arg;
vector<double> theta;
counts.assign(M + 1, 1); // 1 pseudo count
counts[0] += N0;
- for (int i = 0; i < N1; i++) {
+ for (READ_INT_TYPE i = 0; i < N1; i++) {
fr = s[i]; to = s[i + 1];
len = to - fr;
arr.assign(len, 0);
- for (int j = fr; j < to; j++) {
+ for (HIT_INT_TYPE j = fr; j < to; j++) {
arr[j - fr] = theta[hits[j].sid] * hits[j].conprb;
if (j > fr) arr[j - fr] += arr[j - fr - 1]; // cumulative
}
CHAINLEN = 1 + (params->nsamples - 1) * GAP;
for (int ROUND = 1; ROUND <= BURNIN + CHAINLEN; ROUND++) {
- for (int i = 0; i < N1; i++) {
+ for (READ_INT_TYPE i = 0; i < N1; i++) {
--counts[z[i]];
fr = s[i]; to = s[i + 1]; len = to - fr;
arr.assign(len, 0);
- for (int j = fr; j < to; j++) {
+ for (HIT_INT_TYPE j = fr; j < to; j++) {
arr[j - fr] = counts[hits[j].sid] * hits[j].conprb;
if (j > fr) arr[j - fr] += arr[j - fr - 1]; //cumulative
}
pve_c[i] = (pve_c[i] - NSAMPLES * pme_c[i] * pme_c[i]) / (NSAMPLES - 1);
pme_theta[i] /= NSAMPLES;
}
-
- /*
- // combine files
- FILE *fo = fopen(cvsF, "a");
- for (int i = 1; i < nThreads; i++) {
- sprintf(inpF, "%s%d", cvsF, i);
- ifstream fin(inpF);
- while (getline(fin, line)) {
- fprintf(fo, "%s\n", line.c_str());
- }
- fin.close();
- sprintf(command, "rm -f %s", inpF);
- int status = system(command);
- general_assert(status == 0, "Fail to delete file " + cstrtos(inpF) + "!");
- }
- fclose(fo);
- */
}
template<class ModelType>
#include<cassert>
#include<iostream>
#include<vector>
-
#include<algorithm>
+
+#include "utils.h"
#include "GroupInfo.h"
template<class HitType>
}
}
- int getN() { return n; }
+ READ_INT_TYPE getN() { return n; }
- int getNHits() { return nhits; }
+ HIT_INT_TYPE getNHits() { return nhits; }
- int calcNumGeneMultiReads(const GroupInfo&);
- int calcNumIsoformMultiReads();
+ READ_INT_TYPE calcNumGeneMultiReads(const GroupInfo&);
+ READ_INT_TYPE calcNumIsoformMultiReads();
- int getSAt(int pos) { assert(pos >= 0 && pos <= n); return s[pos]; }
+ HIT_INT_TYPE getSAt(READ_INT_TYPE pos) { assert(pos >= 0 && pos <= n); return s[pos]; }
- HitType& getHitAt(int pos) { assert(pos >= 0 && pos < nhits); return hits[pos]; }
+ HitType& getHitAt(HIT_INT_TYPE pos) { assert(pos >= 0 && pos < nhits); return hits[pos]; }
private:
- int n; // n reads in total
- int nhits; // # of hits
- std::vector<int> s;
+ READ_INT_TYPE n; // n reads in total
+ HIT_INT_TYPE nhits; // # of hits
+ std::vector<HIT_INT_TYPE> s;
std::vector<HitType> hits;
};
//Each time only read one read's hits. If you want to start over, must call clear() first!
template<class HitType>
bool HitContainer<HitType>::read(std::istream& in) {
- int tot;
+ HIT_INT_TYPE tot;
if (!(in>>tot)) return false;
assert(tot > 0);
- for (int i = 0; i < tot; i++) {
+ for (HIT_INT_TYPE i = 0; i < tot; i++) {
HitType hit;
if (!hit.read(in)) return false;
hits.push_back(hit);
template<class HitType>
void HitContainer<HitType>::write(std::ostream& out) {
if (n <= 0) return;
- for (int i = 0; i < n; i++) {
+ for (READ_INT_TYPE i = 0; i < n; i++) {
out<<s[i + 1] - s[i];
- for (int j = s[i]; j < s[i + 1]; j++) {
+ for (HIT_INT_TYPE j = s[i]; j < s[i + 1]; j++) {
hits[j].write(out);
}
out<<std::endl;
}
template<class HitType>
-int HitContainer<HitType>::calcNumGeneMultiReads(const GroupInfo& gi) {
- int res = 0;
+READ_INT_TYPE HitContainer<HitType>::calcNumGeneMultiReads(const GroupInfo& gi) {
+ READ_INT_TYPE res = 0;
int *sortgids = NULL;
- for (int i = 0; i < n; i++) {
- int num = s[i + 1] - s[i];
+ for (READ_INT_TYPE i = 0; i < n; i++) {
+ HIT_INT_TYPE num = s[i + 1] - s[i];
sortgids = new int[num];
- for (int j = s[i]; j < s[i + 1]; j++) sortgids[j] = gi.gidAt(hits[j].getSid());
+ for (HIT_INT_TYPE j = s[i]; j < s[i + 1]; j++) sortgids[j] = gi.gidAt(hits[j].getSid());
std::sort(sortgids, sortgids + num);
if (std::unique(sortgids, sortgids + num) - sortgids > 1) ++res;
delete[] sortgids;
}
template<class HitType>
-int HitContainer<HitType>::calcNumIsoformMultiReads() {
- int res = 0;
- for (int i = 0; i < n; i++)
+READ_INT_TYPE HitContainer<HitType>::calcNumIsoformMultiReads() {
+ READ_INT_TYPE res = 0;
+ for (READ_INT_TYPE i = 0; i < n; i++)
if (s[i + 1] - s[i] > 1) ++res;
return res;
}
#ifndef HITWRAPPER_H_
#define HITWRAPPER_H_
+#include "utils.h"
#include "HitContainer.h"
// assume each hit vector contains at least one hit
}
private:
- int i, j;
- int nThreads;
+ int i, nThreads;
+ HIT_INT_TYPE j;
HitContainer<HitType> **hitvs;
};
struct ModelParams {
int M;
- int N[3];
+ READ_INT_TYPE N[3];
int minL, maxL;
bool estRSPD; // true if user wants to estimate RSPD; false if use uniform distribution
int B; // number of bins in RSPD
#include<string>
#include<algorithm>
#include<sstream>
+#include<iostream>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
void startSimulation(simul*, double*);
- bool simulate(int, PairedEndRead&, int&);
+ bool simulate(READ_INT_TYPE, PairedEndRead&, int&);
void finishSimulation();
//Use it after function 'read' or 'estimateFromReads'
static const int read_type = 2;
int M;
- int N[3];
+ READ_INT_TYPE N[3];
Refs *refs;
int seedLen;
genReadFileNames(readFN, i, read_type, s, readFs);
ReadReader<PairedEndRead> reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function
- int cnt = 0;
+ READ_INT_TYPE cnt = 0;
while (reader.next(read)) {
SingleRead mate1 = read.getMate1();
SingleRead mate2 = read.getMate2();
}
}
else if (verbose && (mate1.getReadLength() < seedLen || mate2.getReadLength() < seedLen)) {
- printf("Warning: Read %s is ignored due to at least one of the mates' length < seed length %d!\n", read.getName().c_str(), seedLen);
+ std::cout<< "Warning: Read "<< read.getName()<< " is ignored due to at least one of the mates' length < seed length "<< seedLen<< "!"<< std::endl;
}
++cnt;
- if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " READS PROCESSED"<< std::endl; }
}
- if (verbose) { printf("estimateFromReads, N%d finished.\n", i); }
+ if (verbose) { std::cout<< "estimateFromReads, N"<< i<< " finished."<< std::endl; }
}
mld->finish();
npro->startSimulation();
}
-bool PairedEndModel::simulate(int rid, PairedEndRead& read, int& sid) {
+bool PairedEndModel::simulate(READ_INT_TYPE rid, PairedEndRead& read, int& sid) {
int dir, pos;
int insertL, mateL1, mateL2;
std::string name;
#include<string>
#include<algorithm>
#include<sstream>
+#include<iostream>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
void startSimulation(simul*, double*);
- bool simulate(int, PairedEndReadQ&, int&);
+ bool simulate(READ_INT_TYPE, PairedEndReadQ&, int&);
void finishSimulation();
//Use it after function 'read' or 'estimateFromReads'
static const int read_type = 3;
int M;
- int N[3];
+ READ_INT_TYPE N[3];
Refs *refs;
int seedLen;
genReadFileNames(readFN, i, read_type, s, readFs);
ReadReader<PairedEndReadQ> reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function
- int cnt = 0;
+ READ_INT_TYPE cnt = 0;
while (reader.next(read)) {
SingleReadQ mate1 = read.getMate1();
SingleReadQ mate2 = read.getMate2();
}
}
else if (verbose && (mate1.getReadLength() < seedLen || mate2.getReadLength() < seedLen)) {
- printf("Warning: Read %s is ignored due to at least one of the mates' length < seed length %d!\n", read.getName().c_str(), seedLen);
+ std::cout<< "Warning: Read "<< read.getName()<< " is ignored due to at least one of the mates' length < seed length "<< seedLen<< "!"<< std::endl;
}
++cnt;
- if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " READS PROCESSED"<< std::endl; }
}
- if (verbose) { printf("estimateFromReads, N%d finished.\n", i); }
+ if (verbose) { std::cout<<"estimateFromReads, N"<< i<<" finished."<< std::endl; }
}
mld->finish();
nqpro->startSimulation();
}
-bool PairedEndQModel::simulate(int rid, PairedEndReadQ& read, int& sid) {
+bool PairedEndQModel::simulate(READ_INT_TYPE rid, PairedEndReadQ& read, int& sid) {
int dir, pos;
int insertL, mateL1, mateL2;
std::string name;
#include "utils.h"
struct ReadIndex {
- long nReads;
+ READ_INT_TYPE nReads;
int gap, nPos;
std::streampos *index;
}
//rid 0-based , return crid : current seeked rid
- long locate(long rid, std::ifstream& out) {
+ READ_INT_TYPE locate(READ_INT_TYPE rid, std::ifstream& out) {
if (index == NULL) {
out.seekg(0, std::ios::beg);
return 0;
this->indices = indices;
}
- bool locate(long); // You should guarantee that indices exist and rid is valid, otherwise return false; If it fails, you should reset it manually!
+ bool locate(READ_INT_TYPE); // You should guarantee that indices exist and rid is valid, otherwise return false; If it fails, you should reset it manually!
void reset();
bool next(ReadType& read, int flags = 7) {
}
template<class ReadType>
-bool ReadReader<ReadType>::locate(long rid) {
- long crid = -1;
+bool ReadReader<ReadType>::locate(READ_INT_TYPE rid) {
+ READ_INT_TYPE crid = -1;
ReadType read;
if (indices == NULL) return false;
//We should make sure that crid returned by each indices is the same
for (int i = 0; i < s; i++) {
- long val = indices[i]->locate(rid, *arr[i]);
+ READ_INT_TYPE val = indices[i]->locate(rid, *arr[i]);
if (i == 0) { crid = val; } else { assert(crid == val); }
}
assert(crid <= rid);
//makes no sense for noise gene
int getDir() const { return sid < 0; }
- int getSid() const { return abs(sid); }
+ int getSid() const { return abs(sid); }
int getPos() const { return pos; }
#include<string>
#include<algorithm>
#include<sstream>
+#include<iostream>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
void startSimulation(simul*, double*);
- bool simulate(int, SingleRead&, int&);
+ bool simulate(READ_INT_TYPE, SingleRead&, int&);
void finishSimulation();
double* getMW() {
static const int read_type = 0;
int M;
- int N[3];
+ READ_INT_TYPE N[3];
Refs *refs;
double mean, sd;
int seedLen;
genReadFileNames(readFN, i, read_type, s, readFs);
ReadReader<SingleRead> reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function
- int cnt = 0;
+ READ_INT_TYPE cnt = 0;
while (reader.next(read)) {
if (!read.isLowQuality()) {
mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
if (i == 0) { npro->updateC(read.getReadSeq()); }
}
else if (verbose && read.getReadLength() < seedLen) {
- printf("Warning: Read %s is ignored due to read length %d < seed length %d!\n", read.getName().c_str(), read.getReadLength(), seedLen);
+ std::cout<< "Warning: Read "<< read.getName()<< " is ignored due to read length "<< read.getReadLength()<< " < seed length "<< seedLen<< "!"<< std::endl;
}
-
+
++cnt;
- if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " READS PROCESSED"<< std::endl; }
}
- if (verbose) { printf("estimateFromReads, N%d finished.\n", i); }
+ if (verbose) { std::cout<< "estimateFromReads, N"<< i<< " finished."<< std::endl; }
}
mld != NULL ? mld->finish() : gld->finish();
npro->startSimulation();
}
-bool SingleModel::simulate(int rid, SingleRead& read, int& sid) {
+bool SingleModel::simulate(READ_INT_TYPE rid, SingleRead& read, int& sid) {
int dir, pos, readLen, fragLen;
std::string name;
std::string readseq;
#include<string>
#include<algorithm>
#include<sstream>
+#include<iostream>
#include "utils.h"
#include "my_assert.h"
const LenDist& getGLD() { return *gld; }
void startSimulation(simul*, double*);
- bool simulate(int, SingleReadQ&, int&);
+ bool simulate(READ_INT_TYPE, SingleReadQ&, int&);
void finishSimulation();
//Use it after function 'read' or 'estimateFromReads'
static const int read_type = 1;
int M;
- int N[3];
+ READ_INT_TYPE N[3];
Refs *refs;
double mean, sd;
int seedLen;
genReadFileNames(readFN, i, read_type, s, readFs);
ReadReader<SingleReadQ> reader(s, readFs, refs->hasPolyA(), seedLen); // allow calculation of calc_lq() function
- int cnt = 0;
+ READ_INT_TYPE cnt = 0;
while (reader.next(read)) {
if (!read.isLowQuality()) {
mld != NULL ? mld->update(read.getReadLength(), 1.0) : gld->update(read.getReadLength(), 1.0);
if (i == 0) { nqpro->updateC(read.getReadSeq(), read.getQScore()); }
}
else if (verbose && read.getReadLength() < seedLen) {
- printf("Warning: Read %s is ignored due to read length %d < seed length %d!\n", read.getName().c_str(), read.getReadLength(), seedLen);
+ std::cout<< "Warning: Read "<< read.getName()<< " is ignored due to read length "<< read.getReadLength()<< " < seed length "<< seedLen<< "!"<< std::endl;
}
++cnt;
- if (verbose && cnt % 1000000 == 0) { printf("%d READS PROCESSED\n", cnt); }
+ if (verbose && cnt % 1000000 == 0) { std::cout<< cnt<< " READS PROCESSED"<< std::endl; }
}
- if (verbose) { printf("estimateFromReads, N%d finished.\n", i); }
+ if (verbose) { std::cout<< "estimateFromReads, N"<< i<< " finished."<< std::endl; }
}
mld != NULL ? mld->finish() : gld->finish();
nqpro->startSimulation();
}
-bool SingleQModel::simulate(int rid, SingleReadQ& read, int& sid) {
+bool SingleQModel::simulate(READ_INT_TYPE rid, SingleReadQ& read, int& sid) {
int dir, pos, readLen, fragLen;
std::string name;
std::string qual, readseq;
void buildIndex(char* readF, int gap, bool hasQ) {
int nPos;
- long nReads;
+ READ_INT_TYPE nReads;
bool success;
string line;
char idxF[STRLEN];
}
++nReads;
- if (verbose && nReads % 1000000 == 0) { printf("FIN %lld\n", (long long)nReads); }
+ if (verbose && nReads % 1000000 == 0) { cout<< "FIN "<< nReads<< endl; }
} while (success);
fout.seekp(startPos);
fin.close();
fout.close();
- if (verbose) { printf("Build Index %s is Done!\n", readF); }
+ if (verbose) { cout<< "Build Index "<< readF<< " is Done!"<< endl; }
}
int main(int argc, char* argv[]) {
else {
if (hasMappingFile) {
tid = item.getTranscriptID();
- mi_iter = mi_table.find(tid);
- if (mi_iter == mi_table.end()) {
- fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str());
- exit(-1);
- }
- //assert(iter != table.end());
- gid = mi_iter->second;
- item.setGeneID(gid);
+ mi_iter = mi_table.find(tid);
+ if (mi_iter == mi_table.end()) {
+ fprintf(stderr, "Mapping Info is not correct, cannot find %s's gene_id!\n", tid.c_str());
+ exit(-1);
+ }
+ //assert(iter != table.end());
+ gid = mi_iter->second;
+ item.setGeneID(gid);
}
items.push_back(item);
}
#include "sam/bam.h"
#include "sam/sam.h"
+#include "utils.h"
+
using namespace std;
string cqname;
if (unaligned || arr.size() == 0) return;
bool isPaired = (arr[0]->core.flag & 0x0001);
if ((isPaired && arr.size() != 2) || (!isPaired && arr.size() != 1)) return;
- for (int i = 0; i < (int)arr.size(); i++) samwrite(out, arr[i]);
+ for (size_t i = 0; i < arr.size(); i++) samwrite(out, arr[i]);
}
int main(int argc, char* argv[]) {
out = samopen(argv[2], "wb", in->header);
assert(out != 0);
- int cnt = 0;
+ HIT_INT_TYPE cnt = 0;
cqname = "";
arr.clear();
if (cqname != bam1_qname(b)) {
output();
cqname = bam1_qname(b);
- for (int i = 0; i < (int)arr.size(); i++) bam_destroy1(arr[i]);
+ for (size_t i = 0; i < arr.size(); i++) bam_destroy1(arr[i]);
arr.clear();
unaligned = false;
}
using namespace std;
int read_type; // 0 SingleRead, 1 SingleReadQ, 2 PairedEndRead, 3 PairedEndReadQ
-int N[3]; // note, N = N0 + N1 + N2 , but may not be equal to the total number of reads in data
-int nHits; // # of hits
-int nUnique, nMulti, nIsoMulti;
+READ_INT_TYPE N[3]; // note, N = N0 + N1 + N2 , but may not be equal to the total number of reads in data
+HIT_INT_TYPE nHits; // # of hits
+READ_INT_TYPE nUnique, nMulti, nIsoMulti;
char fn_list[STRLEN];
char groupF[STRLEN], tiF[STRLEN];
char imdName[STRLEN];
ostream *cat[3][2]; // cat : category 1-dim 0 N0 1 N1 2 N2; 2-dim 0 mate1 1 mate2
char readOutFs[3][2][STRLEN];
-map<int, int> counter;
-map<int, int>::iterator iter;
+map<int, READ_INT_TYPE> counter;
+map<int, READ_INT_TYPE>::iterator iter;
void init(const char* imdName, char alignFType, const char* alignF) {
nUnique = nMulti = nIsoMulti = 0;
memset(N, 0, sizeof(N));
- long long cnt = 0;
+ READ_INT_TYPE cnt = 0;
record_val = -2; //indicate no recorded read now
while ((val = parser->parseNext(read, hit)) >= 0) {
}
++cnt;
- if (verbose && (cnt % 1000000 == 0)) { printf("Parsed %lld entries\n", cnt); }
+ if (verbose && (cnt % 1000000 == 0)) { cout<< "Parsed "<< cnt<< " entries"<< endl; }
}
if (record_val >= 0) {
hit_out.open(datF);
- string firstLine(59, ' ');
+ string firstLine(99, ' ');
firstLine.append(1, '\n'); //May be dangerous!
hit_out<<firstLine;
#include "sam/bam.h"
#include "sam/sam.h"
+#include "utils.h"
#include "my_assert.h"
using namespace std;
isValid = true;
- long cnt = 0;
+ HIT_INT_TYPE cnt = 0;
string cqname(""), qname;
uint64_t creadlen = 0, readlen;
bool cispaired = false, ispaired;
#include "sam/bam.h"
#include "sam/sam.h"
+#include "utils.h"
#include "my_assert.h"
using namespace std;
string qname;
bool go_on = (samread(in, b) >= 0);
bool isPaired;
- long cnt = 0;
+ HIT_INT_TYPE cnt = 0;
printf("."); fflush(stdout);
using namespace std;
-int N;
+READ_INT_TYPE N;
int model_type, M, m;
Refs refs;
fin.close();
for (int i = 1; i <= M; i++) theta[i] = theta[i] / denom * (1.0 - theta[0]);
- int resimulation_count = 0;
+ READ_INT_TYPE resimulation_count = 0;
//simulating...
model.startSimulation(&sampler, theta);
- for (int i = 0; i < N; i++) {
+ for (READ_INT_TYPE i = 0; i < N; i++) {
while (!model.simulate(i, read, sid)) { ++resimulation_count; }
read.write(n_os, os);
++counts[sid];
- if ((i + 1) % 1000000 == 0 && verbose) printf("GEN %d\n", i + 1);
+ if ((i + 1) % 1000000 == 0 && verbose) cout<<"GEN "<< i + 1<< endl;
}
model.finishSimulation();
- printf("Total number of resimulation is %d\n", resimulation_count);
+ cout<< "Total number of resimulation is "<< resimulation_count<< endl;
}
void writeResFiles(char* outFN) {
#include<cassert>
#include<string>
#include<vector>
+#include<stdint.h>
+
+typedef uint64_t HIT_INT_TYPE;
+typedef uint64_t READ_INT_TYPE;
const int STRLEN = 10005 ;
const double EPSILON = 1e-300;
#include <cstring>
#include <cstdlib>
#include <cassert>
+#include <iostream>
#include <stdint.h>
#include "sam/bam.h"
#include "sam/sam.h"
+#include "utils.h"
#include "wiggle.h"
void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) {
memset(used, 0, sizeof(bool) * header->n_targets);
int cur_tid = -1; //current tid;
- int cnt = 0;
+ HIT_INT_TYPE cnt;
bam1_t *b = bam_init1();
Wiggle wiggle;
while (samread(bam_in, b) >= 0) {
}
add_bam_record_to_wiggle(b, wiggle);
++cnt;
- if (cnt % 1000000 == 0) fprintf(stderr, "%d FIN\n", cnt);
+ if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl;
}
if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); }