- sprintf(cvsF, "%s.countvectors", imdName);
- fo = fopen(cvsF, "w");
- assert(CHAINLEN % GAP == 0);
- fprintf(fo, "%d %d\n", CHAINLEN / GAP, M + 1);
- //fprintf(fo, "%d %d\n", CHAINLEN, M + 1);
+ /* The reason that for noise gene, mw value is 1 is :
+ * currently, all masked positions are for poly(A) sites, which in theory should be filtered out.
+ * So the theta0 does not containing reads from any masked position
+ */
+
+ for (int i = 0; i <= M; i++) {
+ // i == 0, mw[i] == 1
+ if (i > 0 && (mw[i] < EPSILON || eel[i] < EPSILON)) {
+ theta[i] = 0.0;
+ continue;
+ }
+ theta[i] = theta[i] / mw[i];
+ sum += theta[i];
+ }
+ // currently is OK, since no transcript should be masked totally, only the poly(A) tail related part will be masked
+ general_assert(sum >= EPSILON, "No effective length is no less than" + ftos(MINEEL, 6) + " !");
+ for (int i = 0; i <= M; i++) theta[i] /= sum;
+}
+
+void calcExpressionValues(const vector<double>& theta, const vector<double>& eel, vector<double>& tpm, vector<double>& fpkm) {
+ double denom;
+ vector<double> frac;
+
+ //calculate fraction of count over all mappabile reads
+ denom = 0.0;
+ frac.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) {
+ frac[i] = theta[i];
+ denom += frac[i];
+ }
+ general_assert(denom >= EPSILON, "No alignable reads?!");
+ for (int i = 1; i <= M; i++) frac[i] /= denom;
+
+ //calculate FPKM
+ fpkm.assign(M + 1, 0.0);
+ for (int i = 1; i <= M; i++)
+ if (eel[i] >= EPSILON) fpkm[i] = frac[i] * 1e9 / eel[i];
+
+ //calculate TPM
+ tpm.assign(M + 1, 0.0);
+ denom = 0.0;
+ for (int i = 1; i <= M; i++) denom += fpkm[i];
+ for (int i = 1; i <= M; i++) tpm[i] = fpkm[i] / denom * 1e6;
+}
+
+void* Gibbs(void* arg) {
+ int CHAINLEN;
+ HIT_INT_TYPE len, fr, to;
+ Params *params = (Params*)arg;
+
+ vector<double> theta, tpm, fpkm;
+ vector<int> z, counts;
+ vector<double> arr;
+
+ uniform01 rg(*params->engine);
+
+ // generate initial state
+ sampleTheta(*params->engine, theta);