5 * Created by westcott on 7/6/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "mothurmetastats.h"
11 #include "mothurfisher.h"
13 /***********************************************************/
14 MothurMetastats::MothurMetastats(double t, int n) {
16 m = MothurOut::getInstance();
20 }catch(exception& e) {
21 m->errorOut(e, "MothurMetastats", "MothurMetastats");
25 /***********************************************************/
26 MothurMetastats::~MothurMetastats() {}
27 /***********************************************************/
28 //main metastats function
29 int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secondGroupingStart) {
32 row = data.size(); //numBins
33 column = data[0].size(); //numGroups in subset
34 int size = row*column;
36 //consistent with original, but this should never be true
37 if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; }
39 //Initialize the matrices
40 vector<double> pmatrix; pmatrix.resize(size, 0.0);
41 vector<double> permuted; permuted.resize(size, 0.0);
42 vector< vector<double> > storage; storage.resize(row);
43 for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); }
45 //Produces the sum of each column
46 vector<double> total; total.resize(column, 0.0);
47 vector<double> ratio; ratio.resize(column, 0.0);
48 double total1 = 0.0; double total2 = 0.0;
50 //total[i] = total abundance for group[i]
51 for (int i = 0; i < column; i++) {
52 for (int j = 0; j < row; j++) {
53 total[i] += data[j][i];
57 //total for first grouping
58 for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; }
60 //total for second grouping
61 for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; }
63 //Creates the ratios by first finding the minimum of totals
64 double min = total[0];
65 for (int i = 0; i < total.size(); i++) {
66 if (total[i] < min) { min = total[i]; }
70 if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; }
73 for(int i = 0; i < ratio.size(); i++){ ratio[i] = total[i] / min; }
75 //Change matrix into an array as received by R for compatibility - kept to be consistent with original
77 for(int i = 0; i < column; i++){
78 for(int j = 0; j < row; j++){
79 pmatrix[count]=data[j][i];
85 for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; }
89 for (int i=0; i < size; i++) {
90 if (count % row == 0) { j++; }
91 pmatrix[i] /= ratio[j];
96 vector<double> permuted_ttests; permuted_ttests.resize(row, 0.0);
97 vector<double> pvalues; pvalues.resize(row, 0.0);
98 vector<double> tinitial; tinitial.resize(row, 0.0);
100 if (m->control_pressed) { return 1; }
102 //Find the initial values for the matrix.
103 start(pmatrix, secondGroupingStart, tinitial, storage);
105 if (m->control_pressed) { return 1; }
107 // Start the calculations.
108 if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){
110 vector<double> fish; fish.resize(row, 0.0);
111 vector<double> fish2; fish2.resize(row, 0.0);
113 for(int i = 0; i < row; i++){
115 for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; }
116 for(int j = secondGroupingStart; j < column; j++) { fish2[i] += data[i][j]; }
118 //vector<double> tempData; tempData.resize(4, 0.0);
119 double f11, f12, f21, f22;
122 f21 = total1 - fish[i];
123 f22 = total2 - fish2[i];
128 pre = fisher.fexact(f11, f12, f21, f22);
130 if (m->control_pressed) { return 1; }
132 if (pre > 0.999999999) { pre = 1.0; }
139 testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues);
141 if (m->control_pressed) { return 1; }
143 // Checks to make sure the matrix isn't sparse.
144 vector<double> sparse; sparse.resize(row, 0.0);
145 vector<double> sparse2; sparse2.resize(row, 0.0);
149 for(int i = 0; i < row; i++){
151 for(int j = 0; j < secondGroupingStart; j++) { sparse[i] += data[i][j]; }
152 if(sparse[i] < (double)secondGroupingStart) { c++; }
155 for(int j = secondGroupingStart; j < column; j++) { sparse2[i] += data[i][j]; }
156 if( (sparse2[i] < (double)(column-secondGroupingStart))) { c++; }
160 double f11,f12,f21,f22;
162 f11=sparse[i]; sparse[i]=0;
163 f12=sparse2[i]; sparse2[i]=0;
170 pre = fisher.fexact(f11, f12, f21, f22);
172 if (m->control_pressed) { return 1; }
174 if (pre > 0.999999999){
186 // Calculates the mean of counts (not normalized)
187 vector< vector<double> > temp; temp.resize(row);
188 for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); }
190 for (int j = 0; j < row; j++){
191 if (m->control_pressed) { return 1; }
193 for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; }
194 temp[j][0] /= (double)secondGroupingStart;
196 for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; }
197 temp[j][1] /= (double)(column-secondGroupingStart);
200 for(int i = 0; i < row; i++){
201 if (m->control_pressed) { return 1; }
203 storage[i][3]=temp[i][0];
204 storage[i][7]=temp[i][1];
205 storage[i][8]=pvalues[i];
207 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
208 cout << "pvalues" << endl;
209 for (int i = 0; i < row; i++){ if (pvalues[i] < 0.0000001) {
211 }cout << pvalues[i] << '\t'; }
213 calc_qvalues(pvalues);
216 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
217 for (int i = 0; i < row; i++){
219 if (m->control_pressed) { return 1; }
221 if(pvalues[i] < threshold){
222 m->mothurOut("Feature " + toString((i+1)) + " is significant, p = ");
224 m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
228 // And now we write the files to a text file.
230 time_t t; t = time(NULL);
231 local = localtime(&t);
234 m->openOutputFile(outputFileName, out);
235 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
237 out << "Local time and date of test: " << asctime(local) << endl;
238 out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl;
239 if (bflag == 1){ out << numPermutations << " permutations" << endl << endl; }
241 //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
242 //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
243 out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n";
245 for(int i = 0; i < row; i++){
246 if (m->control_pressed) { out.close(); return 0; }
249 for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
259 }catch(exception& e) {
260 m->errorOut(e, "MothurMetastats", "runMetastats");
264 /***********************************************************/
265 //Find the initial values for the matrix
266 int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vector<double>& initial, vector< vector<double> >& storage) {
271 double xbardiff = 0.0; double denom = 0.0;
272 vector<double> store; store.resize(a, 0.0);
273 vector<double> tool; tool.resize(a, 0.0);
274 vector< vector<double> > C1; C1.resize(row);
275 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
276 vector< vector<double> > C2; C2.resize(row);
277 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
279 meanvar(Imatrix, secondGroupingStart, store);
281 if (m->control_pressed) { return 0; }
283 //copy store into tool
286 for (int i = 0; i < row; i++){
287 C1[i][0]=tool[i]; //mean group 1
288 storage[i][0]=C1[i][0];
289 C1[i][1]=tool[i+row+row]; // var group 1
290 storage[i][1]=C1[i][1];
291 C1[i][2]=C1[i][1]/(secondGroupingStart);
292 storage[i][2]=sqrt(C1[i][2]);
294 C2[i][0]=tool[i+row]; // mean group 2
295 storage[i][4]=C2[i][0];
296 C2[i][1]=tool[i+row+row+row]; // var group 2
297 storage[i][5]=C2[i][1];
298 C2[i][2]=C2[i][1]/(column-secondGroupingStart);
299 storage[i][6]=sqrt(C2[i][2]);
302 if (m->control_pressed) { return 0; }
304 for (int i = 0; i < row; i++){
305 xbardiff = C1[i][0]-C2[i][0];
306 denom = sqrt(C1[i][2]+C2[i][2]);
307 initial[i]=fabs(xbardiff/denom);
312 }catch(exception& e) {
313 m->errorOut(e, "MothurMetastats", "start");
317 /***********************************************************/
318 int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, vector<double>& store) {
320 vector<double> temp; temp.resize(row, 0.0);
321 vector<double> temp2; temp2.resize(row, 0.0);
322 vector<double> var; var.resize(row, 0.0);
323 vector<double> var2; var2.resize(row, 0.0);
325 double a = secondGroupingStart;
326 double b = column - a;
328 int n = row * column;
330 for (int i = 0; i < m; i++) { temp[i%row] += pmatrix[i]; }
331 for (int i = 0; i < n; i++) { temp2[i%row]+= pmatrix[i]; }
332 for (int i = 0; i < row; i++) { temp2[i] -= temp[i]; }
333 for (int i = 0; i <= row-1;i++) {
334 store[i] = temp[i]/a;
335 store[i+row]=temp2[i]/b;
338 //That completes the mean calculations.
340 for (int i = 0; i < m; i++) { var[i%row] += pow((pmatrix[i]-store[i%row]),2); }
341 for (int i = m; i < n; i++) { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); }
342 for (int i = 0; i <= row-1; i++){
343 store[i+2*row]=var[i]/(a-1);
344 store[i+3*row]=var2[i]/(b-1);
347 // That completes var calculations.
351 }catch(exception& e) {
352 m->errorOut(e, "MothurMetastats", "meanvar");
356 /***********************************************************/
357 int MothurMetastats::testp(vector<double>& permuted_ttests, vector<double>& permuted, vector<double>& Imatrix, int secondGroupingStart, vector<double>& Tinitial, vector<double>& ps) {
360 vector<double> Tvalues; Tvalues.resize(row, 0.0);
361 vector<double> counter; counter.resize(row, 0.0);
368 for (int j = 1; j <= row; j++) {
369 if (m->control_pressed) { return 0; }
370 permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter);
373 for(int j = 0; j < row; j++) {
374 if (m->control_pressed) { return 0; }
375 ps[j] = ((counter[j]+1)/(double)(a+1));
380 }catch(exception& e) {
381 m->errorOut(e, "MothurMetastats", "testp");
385 /***********************************************************/
386 int MothurMetastats::permute_matrix(vector<double>& Imatrix, vector<double>& permuted, int secondGroupingStart, vector<double>& trial_ts, vector<double>& Tinitial, vector<double>& counter1){
389 vector<int> y; y.resize(column, 0);
390 for (int i = 1; i <= column; i++){ y[i-1] = i; }
394 int f = 0; int c = 0; int k = 0;
395 for (int i = 0; i < column; i++){
397 if (m->control_pressed) { return 0; }
399 f = y[i]; //column number
403 if (f == 1){ c = 0; } // starting value position in the Imatrix
405 for(int j = 1; j <= row; j++){
406 permuted[k] = Imatrix[c];
411 calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1);
415 }catch(exception& e) {
416 m->errorOut(e, "MothurMetastats", "permute_matrix");
420 /***********************************************************/
421 int MothurMetastats::permute_array(vector<int>& array) {
423 static int seeded = 0;
430 for (int i = 0; i < array.size(); i++) {
431 if (m->control_pressed) { return 0; }
433 int selection = rand() % (array.size() - i);
434 int tmp = array[i + selection];
435 array[i + selection] = array[i];
441 }catch(exception& e) {
442 m->errorOut(e, "MothurMetastats", "permute_array");
446 /***********************************************************/
447 int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupingStart, vector<double>& Ts, vector<double>& Tinitial, vector<double>& counter) {
451 vector< vector<double> > C1; C1.resize(row);
452 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
453 vector< vector<double> > C2; C2.resize(row);
454 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
455 vector<double> storage; storage.resize(a, 0.0);
456 vector<double> tool; tool.resize(a, 0.0);
457 double xbardiff = 0.0; double denom = 0.0;
459 meanvar(Pmatrix, secondGroupingStart, storage);
461 for(int i = 0;i <= (a-1); i++) {
462 if (m->control_pressed) { return 0; }
463 tool[i] = storage[i];
466 for (int i = 0; i < row; i++){
467 if (m->control_pressed) { return 0; }
469 C1[i][1]=tool[i+row+row];
470 C1[i][2]=C1[i][1]/(secondGroupingStart);
472 C2[i][0]=tool[i+row];
473 C2[i][1]=tool[i+row+row+row]; // var group 2
474 C2[i][2]=C2[i][1]/(column-secondGroupingStart);
477 for (int i = 0; i < row; i++){
478 if (m->control_pressed) { return 0; }
479 xbardiff = C1[i][0]-C2[i][0];
480 denom = sqrt(C1[i][2]+C2[i][2]);
481 Ts[i]=fabs(xbardiff/denom);
482 if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
489 }catch(exception& e) {
490 m->errorOut(e, "MothurMetastats", "calc_twosample_ts");
494 /***********************************************************/
495 vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
497 vector<double> qvalues;
498 int numRows = pValues.size();
500 //fill lambdas with 0.00, 0.01, 0.02... 0.95
501 vector<double> lambdas(96, 0);
502 for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; }
504 vector<double> pi0_hat(lambdas.size(), 0);
507 for (int l = 0; l < lambdas.size(); l++){ // for each lambda value
509 for (int i = 0; i < numRows; i++){ // for each p-value in order
510 if (pValues[i] > lambdas[l]){ count++; }
511 pi0_hat[l] = count/(double)(numRows*(1-lambdas[l]));
515 //vector<double> f_spline = smoothSpline(lambdas, pi0_hat, 3);
516 //double pi0 = f_spline[(f_spline.size()-1)]; // this is the essential pi0_hat value
519 double pi0, notsure; //this is the essential pi0_hat value
521 vector<double> resultsSpline(lambdas.size(), 0.0);
522 spline(pi0_hat, lambdas, notSure, notSure, resultsSpline);
523 //some sort of loop to get best value??
524 splint(pi0_hat, lambdas, notsure, pi0, resultsSpline);
527 vector<double> ordered_qs = qvalues;
528 vector<int> ordered_ps(pValues.size(), 0);
529 for (int i = 1; i < ordered_ps.size(); i++) { ordered_ps[i] = ordered_ps[i-1] + 1; }
530 vector<double> tempPvalues = pValues;
531 OrderPValues(0, numRows-1, tempPvalues, ordered_ps);
533 ordered_qs[numRows-1] <- min((pValues[ordered_ps[numRows-1]]*pi0), 1.0);
534 for (int i = (numRows-2); i >= 0; i--){
535 double p = pValues[ordered_ps[i]];
536 double temp = p*numRows*pi0/(double)i;
538 ordered_qs[i] = min(temp, ordered_qs[i+1]);
539 ordered_qs[i] = min(ordered_qs[i], 1.0);
542 //re-distribute calculated qvalues to appropriate rows
543 for (int i = 0; i < numRows; i++){
544 qvalues[ordered_ps[i]] = ordered_qs[i];
549 }catch(exception& e) {
550 m->errorOut(e, "MothurMetastats", "calc_qvalues");
554 /***********************************************************/
555 int MothurMetastats::OrderPValues(int low, int high, vector<double>& p, vector<int>& order) {
561 int pivot = (low+high) / 2;
563 swapElements(low, pivot, p, order); //puts pivot in final spot
570 /* find member above ... */
571 while((i <= high) && (p[i] <= key)) { i++; }
573 /* find element below ... */
574 while((j >= low) && (p[j] > key)) { j--; }
577 swapElements(i, j, p, order);
581 swapElements(low, j, p, order);
584 OrderPValues(low, j-1, p, order);
585 OrderPValues(j+1, high, p, order);
590 }catch(exception& e) {
591 m->errorOut(e, "MothurMetastats", "OrderPValues");
595 /***********************************************************/
596 int MothurMetastats::swapElements(int i, int j, vector<double>& p, vector<int>& order) {
609 }catch(exception& e) {
610 m->errorOut(e, "MothurMetastats", "swapElements");
614 /***********************************************************
615 vector<double> MothurMetastats::smoothSpline(vector<double> x, vector<double> y, int df) {
618 cout << "lambdas" << endl;
619 for (int l = 0; l < x.size(); l++){ cout << x[l] << '\t'; }
620 cout << endl << "pi0_hat" << endl;
621 for (int l = 0; l < y.size(); l++){ cout << y[l] << '\t'; }
624 //double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; double maxit = 500;
626 vector<double> w(n, 1);
628 //x <- signif(x, 6L) - I think this rounds to 6 decimals places
629 //ux <- unique(sort(x)) //x will be unique and sorted since we created it
630 //ox <- match(x, ux) //since its unique and sort it will match
632 vector<double> wbar(n, 0);
633 vector<double> ybar(n, 0);
634 vector<double> xbar(n, 0.0);
636 for (int i = 0; i < n; i++) {
639 yssw += (w[i] * y[i] * y[i]) - wbar[i] * (ybar[i] * ybar[i]);
640 xbar[i] = (x[i] - x[0]) / (x[n-1] - x[0]);
643 vector<double> knot = sknot1(xbar);
644 int nk = knot.size() - 4;
646 //double ispar = 0.0; double spar = 0.0; double icrit = 3.0; double dofoff = 3.0;
650 }catch(exception& e) {
651 m->errorOut(e, "MothurMetastats", "smoothSpline");
655 /***********************************************************/
656 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
657 int MothurMetastats::spline(vector<double>& x, vector<double>& y, int yp1, int ypn, vector<double>& y2) {
660 cout << "lambdas" << endl;
661 for (int l = 0; l < x.size(); l++){ cout << x[l] << '\t'; }
662 cout << endl << "pi0_hat" << endl;
663 for (int l = 0; l < y.size(); l++){ cout << y[l] << '\t'; }
666 double p, qn, sig, un;
669 vector<double> u(n-1, 0.0);
671 if (yp1 > 0.99e30) { y2[0] = u[0] = 0.0; }
674 u[0] = (3.0 / (x[1] - x[0])) * ((y[1] - y[0]) / (x[1]-x[0]) - yp1);
677 for (int i = 1; i < n-1; i++) {
678 sig = (x[i]-x[i-1])/(x[i+1]-x[i-1]);
679 p = sig * y2[i-1] + 2.0;
680 y2[i] = (sig - 1.0)/p;
681 u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i] - y[i-1]) / (x[i]-x[i-1]);
682 u[i] = (6.0*u[i]/(x[i+1]-x[i-1])-sig*u[i-1])/p;
685 if (ypn > 0.99e30) { qn=un=0.0; }
688 un=(3.0/(x[n-1]-x[n-2]))*(ypn-(y[n-1]-y[n-2])/(x[n-1]-x[n-2]));
691 y2[n-1]=(un-qn*u[n-2])/(qn*y2[n-2]+1.0);
693 for (int k=n-2; k>=0; k--) {
694 y2[k]=y2[k]*y2[k+1]+u[k];
699 }catch(exception& e) {
700 m->errorOut(e, "MothurMetastats", "spline");
704 /***********************************************************/
705 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
706 int MothurMetastats::splint(vector<double>& xa, vector<double>& ya, double& x, double& y, vector<double>& y2a) {
715 while (khi-klo > 1) {
717 if (m->control_pressed) { break; }
720 if (xa[k] > x) { khi=k; }
725 if (h == 0.0) { m->mothurOut("[ERROR]: Bad xa input to splint routine."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
728 y=a*ya[klo]+b*ya[khi]+((a*a*a-a)*y2a[klo]+(b*b*b-b)*y2a[khi])*(h*h)/6.0;
732 }catch(exception& e) {
733 m->errorOut(e, "MothurMetastats", "splint");
737 /***********************************************************/
738 vector<double> MothurMetastats::sknot1(vector<double> x) {
740 vector<double> knots;
745 //R equivalent - rep(x[1L], 3L)
746 knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]);
748 //generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk)
749 vector<int> indexes = getSequence(0, n-1, nk);
750 for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); }
752 //R equivalent - rep(x[n], 3L)
753 knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]);
757 }catch(exception& e) {
758 m->errorOut(e, "MothurMetastats", "sknot1");
762 /***********************************************************/
763 vector<int> MothurMetastats::getSequence(int start, int end, int length) {
765 vector<int> sequence;
766 double increment = (end-start) / (double) (length-1);
768 sequence.push_back(start);
769 for (int i = 1; i < length-1; i++) {
770 sequence.push_back(int(i*increment));
772 sequence.push_back(end);
776 }catch(exception& e) {
777 m->errorOut(e, "MothurMetastats", "getSequence");
781 /***********************************************************/
782 //not right, havent fully figured out the variable types yet...
783 int MothurMetastats::nkn(int n) {
786 if (n < 50) { return n; }
788 double a1 = log2(50);
789 double a2 = log2(100);
790 double a3 = log2(140);
791 double a4 = log2(200);
794 return (int)pow(2.0, (a1 + (a2 - a1) * (n - 50)/(float)150));
796 return (int)pow(2.0, (a2 + (a3 - a2) * (n - 200)/(float)600));
797 }else if (n < 3200) {
798 return (int)pow(2.0, (a3 + (a4 - a3) * (n - 800)/(float)2400));
800 return (int)pow((double)(200 + (n - 3200)), 0.2);
806 }catch(exception& e) {
807 m->errorOut(e, "MothurMetastats", "nkn");
811 /***********************************************************/