-#include<ctime>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<sstream>
#include<vector>
-#include "randomc.h"
#include "utils.h"
+#include "sampling.h"
#include "Model.h"
#include "SingleModel.h"
int m, M, N0, N1, nHits;
double totc;
int BURNIN, CHAINLEN, GAP;
+char imdName[STRLEN], statName[STRLEN];
char thetaF[STRLEN], ofgF[STRLEN], groupF[STRLEN], refF[STRLEN], modelF[STRLEN];
char cvsF[STRLEN];
Refs refs;
GroupInfo gi;
-vector<double> theta, pme_theta, eel;
+vector<double> theta, pme_theta, pme_c, eel;
vector<int> s, z;
vector<Item> hits;
bool quiet;
vector<double> arr;
-CRandomMersenne rg(time(NULL));
-void load_data(char* reference_name, char* sample_name, char* imdName) {
+void load_data(char* reference_name, char* statName, char* imdName) {
ifstream fin;
string line;
int tmpVal;
m = gi.getm();
//load thetaF
- sprintf(thetaF, "%s.theta",sample_name);
+ sprintf(thetaF, "%s.theta",statName);
fin.open(thetaF);
if (!fin.is_open()) {
fprintf(stderr, "Cannot open %s!\n", thetaF);
if (verbose) { printf("Loading Data is finished!\n"); }
}
-// arr should be cumulative!
-// interval : [,)
-// random number should be in [0, arr[len - 1])
-// If by chance arr[len - 1] == 0.0, one possibility is to sample uniformly from 0...len-1
-int sample(vector<double>& arr, int len) {
- int l, r, mid;
- double prb = rg.Random() * arr[len - 1];
-
- l = 0; r = len - 1;
- while (l <= r) {
- mid = (l + r) / 2;
- if (arr[mid] <= prb) l = mid + 1;
- else r = mid - 1;
- }
-
- if (l >= len) { printf("%d %lf %lf\n", len, arr[len - 1], prb); }
- assert(l < len);
-
- return l;
-}
-
void init() {
int len, fr, to;
fprintf(fo, "%d %d\n", CHAINLEN / GAP, M + 1);
//fprintf(fo, "%d %d\n", CHAINLEN, M + 1);
+ pme_c.clear(); pme_c.resize(M + 1, 0.0);
pme_theta.clear(); pme_theta.resize(M + 1, 0.0);
for (int ROUND = 1; ROUND <= BURNIN + CHAINLEN; ROUND++) {
}
if (ROUND > BURNIN) {
- if ((ROUND - BURNIN -1) % GAP == 0) writeCountVector(fo);
- writeCountVector(fo);
- for (int i = 0; i <= M; i++) pme_theta[i] += counts[i] / totc;
+ if ((ROUND - BURNIN - 1) % GAP == 0) writeCountVector(fo);
+ for (int i = 0; i <= M; i++) {
+ pme_c[i] += counts[i] - 1;
+ pme_theta[i] += counts[i] / totc;
+ }
}
if (verbose) { printf("ROUND %d is finished!\n", ROUND); }
}
fclose(fo);
- for (int i = 0; i <= M; i++) pme_theta[i] /= CHAINLEN;
+ for (int i = 0; i <= M; i++) {
+ pme_c[i] /= CHAINLEN;
+ pme_theta[i] /= CHAINLEN;
+ }
if (verbose) { printf("Gibbs is finished!\n"); }
}
assert(denom >= EPSILON);
for (int i = 0; i <= M; i++) pme_theta[i] /= denom;
- //calculate normalized read fraction
- double *nrf = new double[M + 1];
- memset(nrf, 0, sizeof(double) * (M + 1));
-
- denom = 1.0 - pme_theta[0];
- if (denom <= 0) { fprintf(stderr, "No alignable reads?!\n"); exit(-1); }
- for (int i = 1; i <= M; i++) nrf[i] = pme_theta[i] / denom;
-
//calculate tau values
double *tau = new double[M + 1];
memset(tau, 0, sizeof(double) * (M + 1));
fo = fopen(outF, "a");
if (fo == NULL) { fprintf(stderr, "Cannot open %s!\n", outF); exit(-1); }
for (int i = 1; i <= M; i++)
- fprintf(fo, "%.15g%c", nrf[i], (i < M ? '\t' : '\n'));
+ fprintf(fo, "%.2f%c", pme_c[i], (i < M ? '\t' : '\n'));
for (int i = 1; i <= M; i++)
fprintf(fo, "%.15g%c", tau[i], (i < M ? '\t' : '\n'));
fclose(fo);
fo = fopen(outF, "a");
if (fo == NULL) { fprintf(stderr, "Cannot open %s!\n", outF); exit(-1); }
for (int i = 0; i < m; i++) {
- double sumN = 0.0; // sum of normalized read fraction
+ double sumC = 0.0; // sum of pme counts
int b = gi.spAt(i), e = gi.spAt(i + 1);
for (int j = b; j < e; j++) {
- sumN += nrf[j];
+ sumC += pme_c[j];
}
- fprintf(fo, "%.15g%c", sumN, (i < m - 1 ? '\t' : '\n'));
+ fprintf(fo, "%.15g%c", sumC, (i < m - 1 ? '\t' : '\n'));
}
for (int i = 0; i < m; i++) {
double sumT = 0.0; // sum of tau values
}
fclose(fo);
- delete[] nrf;
delete[] tau;
if (verbose) { printf("Gibbs based expression values are written!\n"); }
int main(int argc, char* argv[]) {
if (argc < 7) {
- printf("Usage: rsem-run-gibbs reference_name sample_name imdName BURNIN CHAINLEN GAP [-q]\n");
+ printf("Usage: rsem-run-gibbs reference_name sample_name sampleToken BURNIN CHAINLEN GAP [-q]\n");
exit(-1);
}
BURNIN = atoi(argv[4]);
CHAINLEN = atoi(argv[5]);
GAP = atoi(argv[6]);
- load_data(argv[1], argv[2], argv[3]);
+ sprintf(imdName, "%s.temp/%s", argv[2], argv[3]);
+ sprintf(statName, "%s.stat/%s", argv[2], argv[3]);
+ load_data(argv[1], statName, imdName);
quiet = false;
if (argc > 7 && !strcmp(argv[7], "-q")) {
verbose = !quiet;
init();
- Gibbs(argv[3]);
+ Gibbs(imdName);
- sprintf(modelF, "%s.model", argv[2]);
+ sprintf(modelF, "%s.model", statName);
FILE *fi = fopen(modelF, "r");
if (fi == NULL) { fprintf(stderr, "Cannot open %s!\n", modelF); exit(-1); }
fscanf(fi, "%d", &model_type);
fclose(fi);
switch(model_type) {
- case 0 : writeEstimatedParameters<SingleModel>(modelF, argv[3]); break;
- case 1 : writeEstimatedParameters<SingleQModel>(modelF, argv[3]); break;
- case 2 : writeEstimatedParameters<PairedEndModel>(modelF, argv[3]); break;
- case 3 : writeEstimatedParameters<PairedEndQModel>(modelF, argv[3]); break;
+ case 0 : writeEstimatedParameters<SingleModel>(modelF, imdName); break;
+ case 1 : writeEstimatedParameters<SingleQModel>(modelF, imdName); break;
+ case 2 : writeEstimatedParameters<PairedEndModel>(modelF, imdName); break;
+ case 3 : writeEstimatedParameters<PairedEndQModel>(modelF, imdName); break;
}
return 0;