]> git.donarmstrong.com Git - mothur.git/blobdiff - mothurmetastats.cpp
added otu.association command. added calcSpearman, calcKendall and calcPearson functi...
[mothur.git] / mothurmetastats.cpp
index 6a834e96be802fe416949dc8910d8790a24ddf15..ae822254c01ccdf7359ca319c44a2db5005f1fc4 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "mothurmetastats.h"
 #include "mothurfisher.h"
+#include "spline.h"
 
 /***********************************************************/
 MothurMetastats::MothurMetastats(double t, int n) {
@@ -55,10 +56,10 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                }
                
                //total for first grouping
-               for (int i = 0; i < (secondGroupingStart-1); i++) { total1 += total[i]; }
+               for (int i = 0; i < secondGroupingStart; i++) { total1 += total[i]; }
                
                //total for second grouping
-               for (int i = (secondGroupingStart-1); i < column; i++) { total2 += total[i]; }
+               for (int i = secondGroupingStart; i < column; i++) { total2 += total[i]; }
                
                //Creates the ratios by first finding the minimum of totals
                double min = total[0];
@@ -105,15 +106,15 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                if (m->control_pressed) { return 1; }
                
                // Start the calculations.
-               if ( (column == 2) || ((secondGroupingStart-1) < 8) || ((column-secondGroupingStart+1) < 8) ){ 
+               if ( (column == 2) || (secondGroupingStart < 8) || ((column-secondGroupingStart) < 8) ){ 
                        
                        vector<double> fish;    fish.resize(row, 0.0);
                        vector<double> fish2;   fish2.resize(row, 0.0);
                        
                        for(int i = 0; i < row; i++){
                                
-                               for(int j = 0; j < (secondGroupingStart-1); j++)                { fish[i] += data[i][j];        }
-                               for(int j = (secondGroupingStart-1); j < column; j++)   { fish2[i] += data[i][j];       }
+                               for(int j = 0; j < secondGroupingStart; j++)            { fish[i] += data[i][j];        }
+                               for(int j = secondGroupingStart; j < column; j++)       { fish2[i] += data[i][j];       }
                                
                                //vector<double> tempData; tempData.resize(4, 0.0);
                                double f11, f12, f21, f22;
@@ -148,12 +149,12 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                        
                        for(int i = 0; i < row; i++){
                                
-                               for(int j = 0; j < (secondGroupingStart-1); j++){       sparse[i] += data[i][j]; }
-                               if(sparse[i] < (double)(secondGroupingStart-1)){        c++; }
+                               for(int j = 0; j < secondGroupingStart; j++)    {       sparse[i] += data[i][j];        }
+                               if(sparse[i] < (double)secondGroupingStart)             {       c++;                                            }
                                
                                // ?<= for col
-                               for(int j = (secondGroupingStart-1); j < column; j++){  sparse2[i] += data[i][j]; }
-                               if( (sparse2[i] < (double)(column-secondGroupingStart+1))) { c++; }
+                               for(int j = secondGroupingStart; j < column; j++)               {  sparse2[i] += data[i][j]; }
+                               if( (sparse2[i] < (double)(column-secondGroupingStart)))        { c++;                                           }
                                
                                if (c == 2) {
                                        c=0;
@@ -190,11 +191,11 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                for (int j = 0; j < row; j++){
                        if (m->control_pressed) { return 1; }
                        
-                       for (int i = 1; i <= (secondGroupingStart-1); i++){ temp[j][0] += data[j][i-1]; }
-                       temp[j][0] /= (double)(secondGroupingStart-1);
+                       for (int i = 0; i < secondGroupingStart; i++){ temp[j][0] += data[j][i]; }
+                       temp[j][0] /= (double)secondGroupingStart;
                        
-                       for(int i = secondGroupingStart; i <= column; i++){ temp[j][1] += data[j][i-1]; }
-                       temp[j][1] /= (double)(column-secondGroupingStart+1);
+                       for(int i = secondGroupingStart; i < column; i++){ temp[j][1] += data[j][i]; }
+                       temp[j][1] /= (double)(column-secondGroupingStart);
                }
                
                for(int i = 0; i < row; i++){
@@ -205,15 +206,17 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                        storage[i][8]=pvalues[i];
                }
                
+               vector<double> qvalues = calc_qvalues(pvalues);
+               
                // BACKUP checks
                cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
                for (int i = 0; i < row; i++){
                        
                        if (m->control_pressed) { return 1; }
                        
-                       if(pvalues[i] < threshold){
-                               m->mothurOut("Feature " + toString((i+1)) + " is significant, p = "); 
-                               cout << pvalues[i];
+                       if(qvalues[i] < threshold){
+                               m->mothurOut("Feature " + toString((i+1)) + " is significant, q = "); 
+                               cout << qvalues[i];
                                m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
                        }       
                }
@@ -233,19 +236,21 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                
                //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
                //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
-               out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n";
+               out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\tq-value\n";
                
                for(int i = 0; i < row; i++){
                        if (m->control_pressed) { out.close(); return 0; }
                        
                        out << (i+1);
                        for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
+                       out << '\t' << qvalues[i];
                        out << endl;
                }  
                
                out << endl << endl;
                out.close();
                
+               
                return 0;
        
        }catch(exception& e) {
@@ -280,14 +285,14 @@ int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vec
                        storage[i][0]=C1[i][0];
                        C1[i][1]=tool[i+row+row]; // var group 1
                        storage[i][1]=C1[i][1];
-                       C1[i][2]=C1[i][1]/(secondGroupingStart-1);
+                       C1[i][2]=C1[i][1]/(secondGroupingStart);
                        storage[i][2]=sqrt(C1[i][2]);
                        
                        C2[i][0]=tool[i+row]; // mean group 2
                        storage[i][4]=C2[i][0];    
                        C2[i][1]=tool[i+row+row+row]; // var group 2 
                        storage[i][5]=C2[i][1];        
-                       C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
+                       C2[i][2]=C2[i][1]/(column-secondGroupingStart);
                        storage[i][6]=sqrt(C2[i][2]);   
                }
                
@@ -314,7 +319,7 @@ int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, v
                vector<double> var;             var.resize(row, 0.0);
                vector<double> var2;    var2.resize(row, 0.0);
                
-               double a = secondGroupingStart-1;
+               double a = secondGroupingStart;
                double b = column - a;
                int m = a * row;
                int n = row * column;
@@ -459,11 +464,11 @@ int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupi
                        if (m->control_pressed) { return 0; }
                        C1[i][0]=tool[i];
                        C1[i][1]=tool[i+row+row];
-                       C1[i][2]=C1[i][1]/(secondGroupingStart-1);
+                       C1[i][2]=C1[i][1]/(secondGroupingStart);
                        
                        C2[i][0]=tool[i+row];
                        C2[i][1]=tool[i+row+row+row]; // var group 2 
-                       C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
+                       C2[i][2]=C2[i][1]/(column-secondGroupingStart);
                }
                
                for (int i = 0; i < row; i++){
@@ -484,5 +489,245 @@ int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupi
        }
 }
 /***********************************************************/
+vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
+       try {
+               
+               int numRows = pValues.size();
+               vector<double> qvalues(numRows, 0.0);
+
+               //fill lambdas with 0.00, 0.01, 0.02... 0.95
+               vector<double> lambdas(96, 0);
+               for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; }
+               
+               vector<double> pi0_hat(lambdas.size(), 0);
+               
+               //calculate pi0_hat
+               for (int l = 0; l < lambdas.size(); l++){ // for each lambda value
+                       int count = 0;
+                       for (int i = 0; i < numRows; i++){ // for each p-value in order
+                               if (pValues[i] > lambdas[l]){ count++; }
+                       }
+                       pi0_hat[l] = count/(double)(numRows*(1-lambdas[l]));
+               }
+               
+               double pi0 = smoothSpline(lambdas, pi0_hat, 3);
+               
+               //order p-values
+               vector<double> ordered_qs = qvalues;
+               vector<int> ordered_ps(pValues.size(), 0);
+               for (int i = 1; i < ordered_ps.size(); i++) {  ordered_ps[i] = ordered_ps[i-1] + 1; }
+               vector<double> tempPvalues = pValues;
+               OrderPValues(0, numRows-1, tempPvalues, ordered_ps);
+               
+               ordered_qs[numRows-1] = min((pValues[ordered_ps[numRows-1]]*pi0), 1.0);
+               for (int i = (numRows-2); i >= 0; i--){
+                       double p = pValues[ordered_ps[i]];
+                       double temp = p*numRows*pi0/(double)(i+1);
+
+                       ordered_qs[i] = min(temp, ordered_qs[i+1]);
+               }
+               
+               //re-distribute calculated qvalues to appropriate rows
+               for (int i = 0; i < numRows; i++){
+                       qvalues[ordered_ps[i]] = ordered_qs[i];
+               }
+               
+               return qvalues;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "calc_qvalues");
+               exit(1);
+       }
+}
+/***********************************************************/
+int MothurMetastats::OrderPValues(int low, int high, vector<double>& p, vector<int>& order) {
+       try {
+               
+               if (low < high) {
+                       int i = low+1;
+                       int j = high;
+                       int pivot = (low+high) / 2;
+                       
+                       swapElements(low, pivot, p, order);  //puts pivot in final spot
+                       
+                       /* compare value */
+                       double key = p[low];
+                       
+                       /* partition */
+                       while(i <= j) {
+                               /* find member above ... */
+                               while((i <= high) && (p[i] <= key))     {  i++;  }  
+                               
+                               /* find element below ... */
+                               while((j >= low) && (p[j] > key))       {  j--;  } 
+                               
+                               if(i < j) {
+                                       swapElements(i, j, p, order);
+                               }
+                       } 
+                       
+                       swapElements(low, j, p, order);
+                       
+                       /* recurse */
+                       OrderPValues(low, j-1, p, order);
+                       OrderPValues(j+1, high, p, order); 
+               }               
+               
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "OrderPValues");
+               exit(1);
+       }
+}
+/***********************************************************/
+int MothurMetastats::swapElements(int i, int j, vector<double>& p, vector<int>& order) {
+       try {
+               
+               double z = p[i];
+               p[i] = p[j];
+               p[j] = z;
+               
+               int temp = order[i];
+               order[i] = order[j];
+               order[j] = temp;
+               
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "swapElements");
+               exit(1);
+       }
+}
+/***********************************************************/
+double MothurMetastats::smoothSpline(vector<double>& x, vector<double>& y, int df) {
+       try {
+                               
+               double result = 0.0;
+               int n = x.size();
+               vector<double> w(n, 1);
+               double* xb = new double[n];
+               double* yb = new double[n];
+               double* wb = new double[n];
+               double yssw = 0.0;
+               for (int i = 0; i < n; i++) {
+                       wb[i] = w[i];
+                       yb[i] = w[i]*y[i];
+                       yssw += (w[i] * y[i] * y[i]) - wb[i] * (yb[i] * yb[i]);
+                       xb[i] = (x[i] - x[0]) / (x[n-1] - x[0]);
+               }
+               
+               vector<double> knot = sknot1(xb, n);
+               int nk = knot.size() - 4;
+
+               double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; int maxit = 500;
+               int ispar = 0; int icrit = 3; double dofoff = 3.0;
+               double penalty = 1.0; 
+               int ld4 = 4; int isetup = 0; int ldnk = 1; int ier; double spar = 1.0; double crit;
+               
+               double* knotb = new double[knot.size()];
+               double* coef1 = new double[nk];
+               double* lev1 = new double[n];
+               double* sz1 = new double[n];
+               for (int i = 0; i < knot.size(); i++) { knotb[i] = knot[i];     }
+               
+               Spline spline;
+               spline.sbart(&penalty, &dofoff, &xb[0], &yb[0], &wb[0], &yssw, &n, &knotb[0], &nk, &coef1[0], &sz1[0], &lev1[0], &crit,
+                               &icrit, &spar, &ispar, &maxit, &low, &high, &tol, &eps, &isetup, &ld4, &ldnk, &ier);
+               
+               result = coef1[nk-1];
+               
+               //free memory
+               delete [] xb;
+               delete [] yb;
+               delete [] wb;
+               delete [] knotb;
+               delete [] coef1;
+               delete [] lev1;
+               delete [] sz1;
+                                                       
+               return result;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "smoothSpline");
+               exit(1);
+       }
+}
+/***********************************************************/
+vector<double> MothurMetastats::sknot1(double* x, int n) {
+       try {
+               vector<double> knots;
+               int nk = nkn(n);
+               
+               //R equivalent - rep(x[1L], 3L)
+               knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]);
+               
+               //generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk)
+               vector<int> indexes = getSequence(0, n-1, nk);
+               for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]);  } 
+               
+               //R equivalent - rep(x[n], 3L)
+               knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]);
+                               
+               return knots;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "sknot1");
+               exit(1);
+       }
+}
+/***********************************************************/
+vector<int> MothurMetastats::getSequence(int start, int end, int length) {
+       try {
+               vector<int> sequence;
+               double increment = (end-start) / (double) (length-1);
+               
+               sequence.push_back(start);
+               for (int i = 1; i < length-1; i++) {
+                       sequence.push_back(int(i*increment));
+               }
+               sequence.push_back(end);
+               
+               return sequence;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "getSequence");
+               exit(1);
+       }
+}      
+/***********************************************************/
+//not right, havent fully figured out the variable types yet...
+int MothurMetastats::nkn(int n) {
+       try {
+               
+               if (n < 50) { return n; }
+               else {
+                       double a1 = log2(50);
+                       double a2 = log2(100);
+                       double a3 = log2(140);
+                       double a4 = log2(200);
+                       
+                       if (n < 200) {
+                               return (int)pow(2.0, (a1 + (a2 - a1) * (n - 50)/(float)150));
+                       }else if (n < 800) {
+                               return (int)pow(2.0, (a2 + (a3 - a2) * (n - 200)/(float)600));
+                       }else if (n < 3200) {
+                               return (int)pow(2.0, (a3 + (a4 - a3) * (n - 800)/(float)2400));
+                       }else {
+                               return (int)pow((double)(200 + (n - 3200)), 0.2);
+                       }
+               }
+       
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "nkn");
+               exit(1);
+       }
+}
+/***********************************************************/
+
+
+