5 * Created by westcott on 7/6/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "mothurmetastats.h"
11 #include "mothurfisher.h"
14 /***********************************************************/
15 MothurMetastats::MothurMetastats(double t, int n) {
17 m = MothurOut::getInstance();
21 }catch(exception& e) {
22 m->errorOut(e, "MothurMetastats", "MothurMetastats");
26 /***********************************************************/
27 MothurMetastats::~MothurMetastats() {}
28 /***********************************************************/
29 //main metastats function
30 int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secondGroupingStart) {
33 row = data.size(); //numBins
34 column = data[0].size(); //numGroups in subset
35 int size = row*column;
37 //consistent with original, but this should never be true
38 if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; }
40 //Initialize the matrices
41 vector<double> pmatrix; pmatrix.resize(size, 0.0);
42 vector<double> permuted; permuted.resize(size, 0.0);
43 vector< vector<double> > storage; storage.resize(row);
44 for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); }
46 //Produces the sum of each column
47 vector<double> total; total.resize(column, 0.0);
48 vector<double> ratio; ratio.resize(column, 0.0);
49 double total1 = 0.0; double total2 = 0.0;
51 //total[i] = total abundance for group[i]
52 for (int i = 0; i < column; i++) {
53 for (int j = 0; j < row; j++) {
54 total[i] += data[j][i];
58 //total for first grouping
59 for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; }
61 //total for second grouping
62 for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; }
64 //Creates the ratios by first finding the minimum of totals
65 double min = total[0];
66 for (int i = 0; i < total.size(); i++) {
67 if (total[i] < min) { min = total[i]; }
71 if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; }
74 for(int i = 0; i < ratio.size(); i++){ ratio[i] = total[i] / min; }
76 //Change matrix into an array as received by R for compatibility - kept to be consistent with original
78 for(int i = 0; i < column; i++){
79 for(int j = 0; j < row; j++){
80 pmatrix[count]=data[j][i];
86 for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; }
90 for (int i=0; i < size; i++) {
91 if (count % row == 0) { j++; }
92 pmatrix[i] /= ratio[j];
97 vector<double> permuted_ttests; permuted_ttests.resize(row, 0.0);
98 vector<double> pvalues; pvalues.resize(row, 0.0);
99 vector<double> tinitial; tinitial.resize(row, 0.0);
101 if (m->control_pressed) { return 1; }
103 //Find the initial values for the matrix.
104 start(pmatrix, secondGroupingStart, tinitial, storage);
106 if (m->control_pressed) { return 1; }
108 // Start the calculations.
109 if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){
111 vector<double> fish; fish.resize(row, 0.0);
112 vector<double> fish2; fish2.resize(row, 0.0);
114 for(int i = 0; i < row; i++){
116 for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; }
117 for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; }
119 //vector<double> tempData; tempData.resize(4, 0.0);
120 double f11, f12, f21, f22;
123 f21 = total1 - fish[i];
124 f22 = total2 - fish2[i];
129 pre = fisher.fexact(f11, f12, f21, f22);
131 if (m->control_pressed) { return 1; }
133 if (pre > 0.999999999) { pre = 1.0; }
140 testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues);
142 if (m->control_pressed) { return 1; }
144 // Checks to make sure the matrix isn't sparse.
145 vector<double> sparse; sparse.resize(row, 0.0);
146 vector<double> sparse2; sparse2.resize(row, 0.0);
150 for(int i = 0; i < row; i++){
152 for(int j = 0; j < secondGroupingStart; j++) { sparse[i] += data[i][j]; }
153 if(sparse[i] < (double)secondGroupingStart) { c++; }
156 for(int j = secondGroupingStart; j < column; j++) { sparse2[i] += data[i][j]; }
157 if( (sparse2[i] < (double)(column-secondGroupingStart))) { c++; }
161 double f11,f12,f21,f22;
163 f11=sparse[i]; sparse[i]=0;
164 f12=sparse2[i]; sparse2[i]=0;
171 pre = fisher.fexact(f11, f12, f21, f22);
173 if (m->control_pressed) { return 1; }
175 if (pre > 0.999999999){
187 // Calculates the mean of counts (not normalized)
188 vector< vector<double> > temp; temp.resize(row);
189 for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); }
191 for (int j = 0; j < row; j++){
192 if (m->control_pressed) { return 1; }
194 for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; }
195 temp[j][0] /= (double)secondGroupingStart;
197 for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; }
198 temp[j][1] /= (double)(column-secondGroupingStart);
201 for(int i = 0; i < row; i++){
202 if (m->control_pressed) { return 1; }
204 storage[i][3]=temp[i][0];
205 storage[i][7]=temp[i][1];
206 storage[i][8]=pvalues[i];
209 vector<double> qvalues = calc_qvalues(pvalues);
212 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
213 for (int i = 0; i < row; i++){
215 if (m->control_pressed) { return 1; }
217 if(qvalues[i] < threshold){
218 m->mothurOut("Feature " + toString((i+1)) + " is significant, q = ");
220 m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
224 // And now we write the files to a text file.
226 time_t t; t = time(NULL);
227 local = localtime(&t);
230 m->openOutputFile(outputFileName, out);
231 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
233 out << "Local time and date of test: " << asctime(local) << endl;
234 out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl;
235 if (bflag == 1){ out << numPermutations << " permutations" << endl << endl; }
237 //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
238 //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
239 out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\tq-value\n";
241 for(int i = 0; i < row; i++){
242 if (m->control_pressed) { out.close(); return 0; }
245 for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
246 out << '\t' << qvalues[i];
256 }catch(exception& e) {
257 m->errorOut(e, "MothurMetastats", "runMetastats");
261 /***********************************************************/
262 //Find the initial values for the matrix
263 int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vector<double>& initial, vector< vector<double> >& storage) {
268 double xbardiff = 0.0; double denom = 0.0;
269 vector<double> store; store.resize(a, 0.0);
270 vector<double> tool; tool.resize(a, 0.0);
271 vector< vector<double> > C1; C1.resize(row);
272 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
273 vector< vector<double> > C2; C2.resize(row);
274 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
276 meanvar(Imatrix, secondGroupingStart, store);
278 if (m->control_pressed) { return 0; }
280 //copy store into tool
283 for (int i = 0; i < row; i++){
284 C1[i][0]=tool[i]; //mean group 1
285 storage[i][0]=C1[i][0];
286 C1[i][1]=tool[i+row+row]; // var group 1
287 storage[i][1]=C1[i][1];
288 C1[i][2]=C1[i][1]/(secondGroupingStart);
289 storage[i][2]=sqrt(C1[i][2]);
291 C2[i][0]=tool[i+row]; // mean group 2
292 storage[i][4]=C2[i][0];
293 C2[i][1]=tool[i+row+row+row]; // var group 2
294 storage[i][5]=C2[i][1];
295 C2[i][2]=C2[i][1]/(column-secondGroupingStart);
296 storage[i][6]=sqrt(C2[i][2]);
299 if (m->control_pressed) { return 0; }
301 for (int i = 0; i < row; i++){
302 xbardiff = C1[i][0]-C2[i][0];
303 denom = sqrt(C1[i][2]+C2[i][2]);
304 initial[i]=fabs(xbardiff/denom);
309 }catch(exception& e) {
310 m->errorOut(e, "MothurMetastats", "start");
314 /***********************************************************/
315 int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, vector<double>& store) {
317 vector<double> temp; temp.resize(row, 0.0);
318 vector<double> temp2; temp2.resize(row, 0.0);
319 vector<double> var; var.resize(row, 0.0);
320 vector<double> var2; var2.resize(row, 0.0);
322 double a = secondGroupingStart;
323 double b = column - a;
325 int n = row * column;
327 for (int i = 0; i < m; i++) { temp[i%row] += pmatrix[i]; }
328 for (int i = 0; i < n; i++) { temp2[i%row]+= pmatrix[i]; }
329 for (int i = 0; i < row; i++) { temp2[i] -= temp[i]; }
330 for (int i = 0; i <= row-1;i++) {
331 store[i] = temp[i]/a;
332 store[i+row]=temp2[i]/b;
335 //That completes the mean calculations.
337 for (int i = 0; i < m; i++) { var[i%row] += pow((pmatrix[i]-store[i%row]),2); }
338 for (int i = m; i < n; i++) { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); }
339 for (int i = 0; i <= row-1; i++){
340 store[i+2*row]=var[i]/(a-1);
341 store[i+3*row]=var2[i]/(b-1);
344 // That completes var calculations.
348 }catch(exception& e) {
349 m->errorOut(e, "MothurMetastats", "meanvar");
353 /***********************************************************/
354 int MothurMetastats::testp(vector<double>& permuted_ttests, vector<double>& permuted, vector<double>& Imatrix, int secondGroupingStart, vector<double>& Tinitial, vector<double>& ps) {
357 vector<double> Tvalues; Tvalues.resize(row, 0.0);
358 vector<double> counter; counter.resize(row, 0.0);
365 for (int j = 1; j <= row; j++) {
366 if (m->control_pressed) { return 0; }
367 permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter);
370 for(int j = 0; j < row; j++) {
371 if (m->control_pressed) { return 0; }
372 ps[j] = ((counter[j]+1)/(double)(a+1));
377 }catch(exception& e) {
378 m->errorOut(e, "MothurMetastats", "testp");
382 /***********************************************************/
383 int MothurMetastats::permute_matrix(vector<double>& Imatrix, vector<double>& permuted, int secondGroupingStart, vector<double>& trial_ts, vector<double>& Tinitial, vector<double>& counter1){
386 vector<int> y; y.resize(column, 0);
387 for (int i = 1; i <= column; i++){ y[i-1] = i; }
391 int f = 0; int c = 0; int k = 0;
392 for (int i = 0; i < column; i++){
394 if (m->control_pressed) { return 0; }
396 f = y[i]; //column number
400 if (f == 1){ c = 0; } // starting value position in the Imatrix
402 for(int j = 1; j <= row; j++){
403 permuted[k] = Imatrix[c];
408 calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1);
412 }catch(exception& e) {
413 m->errorOut(e, "MothurMetastats", "permute_matrix");
417 /***********************************************************/
418 int MothurMetastats::permute_array(vector<int>& array) {
420 static int seeded = 0;
427 for (int i = 0; i < array.size(); i++) {
428 if (m->control_pressed) { return 0; }
430 int selection = rand() % (array.size() - i);
431 int tmp = array[i + selection];
432 array[i + selection] = array[i];
438 }catch(exception& e) {
439 m->errorOut(e, "MothurMetastats", "permute_array");
443 /***********************************************************/
444 int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupingStart, vector<double>& Ts, vector<double>& Tinitial, vector<double>& counter) {
448 vector< vector<double> > C1; C1.resize(row);
449 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
450 vector< vector<double> > C2; C2.resize(row);
451 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
452 vector<double> storage; storage.resize(a, 0.0);
453 vector<double> tool; tool.resize(a, 0.0);
454 double xbardiff = 0.0; double denom = 0.0;
456 meanvar(Pmatrix, secondGroupingStart, storage);
458 for(int i = 0;i <= (a-1); i++) {
459 if (m->control_pressed) { return 0; }
460 tool[i] = storage[i];
463 for (int i = 0; i < row; i++){
464 if (m->control_pressed) { return 0; }
466 C1[i][1]=tool[i+row+row];
467 C1[i][2]=C1[i][1]/(secondGroupingStart);
469 C2[i][0]=tool[i+row];
470 C2[i][1]=tool[i+row+row+row]; // var group 2
471 C2[i][2]=C2[i][1]/(column-secondGroupingStart);
474 for (int i = 0; i < row; i++){
475 if (m->control_pressed) { return 0; }
476 xbardiff = C1[i][0]-C2[i][0];
477 denom = sqrt(C1[i][2]+C2[i][2]);
478 Ts[i]=fabs(xbardiff/denom);
479 if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
486 }catch(exception& e) {
487 m->errorOut(e, "MothurMetastats", "calc_twosample_ts");
491 /***********************************************************/
492 vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
495 int numRows = pValues.size();
496 vector<double> qvalues(numRows, 0.0);
498 //fill lambdas with 0.00, 0.01, 0.02... 0.95
499 vector<double> lambdas(96, 0);
500 for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; }
502 vector<double> pi0_hat(lambdas.size(), 0);
505 for (int l = 0; l < lambdas.size(); l++){ // for each lambda value
507 for (int i = 0; i < numRows; i++){ // for each p-value in order
508 if (pValues[i] > lambdas[l]){ count++; }
510 pi0_hat[l] = count/(double)(numRows*(1-lambdas[l]));
513 double pi0 = smoothSpline(lambdas, pi0_hat, 3);
516 vector<double> ordered_qs = qvalues;
517 vector<int> ordered_ps(pValues.size(), 0);
518 for (int i = 1; i < ordered_ps.size(); i++) { ordered_ps[i] = ordered_ps[i-1] + 1; }
519 vector<double> tempPvalues = pValues;
520 OrderPValues(0, numRows-1, tempPvalues, ordered_ps);
522 ordered_qs[numRows-1] = min((pValues[ordered_ps[numRows-1]]*pi0), 1.0);
523 for (int i = (numRows-2); i >= 0; i--){
524 double p = pValues[ordered_ps[i]];
525 double temp = p*numRows*pi0/(double)(i+1);
527 ordered_qs[i] = min(temp, ordered_qs[i+1]);
530 //re-distribute calculated qvalues to appropriate rows
531 for (int i = 0; i < numRows; i++){
532 qvalues[ordered_ps[i]] = ordered_qs[i];
537 }catch(exception& e) {
538 m->errorOut(e, "MothurMetastats", "calc_qvalues");
542 /***********************************************************/
543 int MothurMetastats::OrderPValues(int low, int high, vector<double>& p, vector<int>& order) {
549 int pivot = (low+high) / 2;
551 swapElements(low, pivot, p, order); //puts pivot in final spot
558 /* find member above ... */
559 while((i <= high) && (p[i] <= key)) { i++; }
561 /* find element below ... */
562 while((j >= low) && (p[j] > key)) { j--; }
565 swapElements(i, j, p, order);
569 swapElements(low, j, p, order);
572 OrderPValues(low, j-1, p, order);
573 OrderPValues(j+1, high, p, order);
578 }catch(exception& e) {
579 m->errorOut(e, "MothurMetastats", "OrderPValues");
583 /***********************************************************/
584 int MothurMetastats::swapElements(int i, int j, vector<double>& p, vector<int>& order) {
597 }catch(exception& e) {
598 m->errorOut(e, "MothurMetastats", "swapElements");
602 /***********************************************************/
603 double MothurMetastats::smoothSpline(vector<double>& x, vector<double>& y, int df) {
608 vector<double> w(n, 1);
609 double* xb = new double[n];
610 double* yb = new double[n];
611 double* wb = new double[n];
613 for (int i = 0; i < n; i++) {
616 yssw += (w[i] * y[i] * y[i]) - wb[i] * (yb[i] * yb[i]);
617 xb[i] = (x[i] - x[0]) / (x[n-1] - x[0]);
620 vector<double> knot = sknot1(xb, n);
621 int nk = knot.size() - 4;
623 double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; int maxit = 500;
624 int ispar = 0; int icrit = 3; double dofoff = 3.0;
625 double penalty = 1.0;
626 int ld4 = 4; int isetup = 0; int ldnk = 1; int ier; double spar = 1.0; double crit;
628 double* knotb = new double[knot.size()];
629 double* coef1 = new double[nk];
630 double* lev1 = new double[n];
631 double* sz1 = new double[n];
632 for (int i = 0; i < knot.size(); i++) { knotb[i] = knot[i]; }
635 spline.sbart(&penalty, &dofoff, &xb[0], &yb[0], &wb[0], &yssw, &n, &knotb[0], &nk, &coef1[0], &sz1[0], &lev1[0], &crit,
636 &icrit, &spar, &ispar, &maxit, &low, &high, &tol, &eps, &isetup, &ld4, &ldnk, &ier);
638 result = coef1[nk-1];
651 }catch(exception& e) {
652 m->errorOut(e, "MothurMetastats", "smoothSpline");
656 /***********************************************************/
657 vector<double> MothurMetastats::sknot1(double* x, int n) {
659 vector<double> knots;
662 //R equivalent - rep(x[1L], 3L)
663 knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]);
665 //generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk)
666 vector<int> indexes = getSequence(0, n-1, nk);
667 for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); }
669 //R equivalent - rep(x[n], 3L)
670 knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]);
674 }catch(exception& e) {
675 m->errorOut(e, "MothurMetastats", "sknot1");
679 /***********************************************************/
680 vector<int> MothurMetastats::getSequence(int start, int end, int length) {
682 vector<int> sequence;
683 double increment = (end-start) / (double) (length-1);
685 sequence.push_back(start);
686 for (int i = 1; i < length-1; i++) {
687 sequence.push_back(int(i*increment));
689 sequence.push_back(end);
693 }catch(exception& e) {
694 m->errorOut(e, "MothurMetastats", "getSequence");
698 /***********************************************************/
699 //not right, havent fully figured out the variable types yet...
700 int MothurMetastats::nkn(int n) {
703 if (n < 50) { return n; }
705 double a1 = log2(50);
706 double a2 = log2(100);
707 double a3 = log2(140);
708 double a4 = log2(200);
711 return (int)pow(2.0, (a1 + (a2 - a1) * (n - 50)/(float)150));
713 return (int)pow(2.0, (a2 + (a3 - a2) * (n - 200)/(float)600));
714 }else if (n < 3200) {
715 return (int)pow(2.0, (a3 + (a4 - a3) * (n - 800)/(float)2400));
717 return (int)pow((double)(200 + (n - 3200)), 0.2);
723 }catch(exception& e) {
724 m->errorOut(e, "MothurMetastats", "nkn");
728 /***********************************************************/