]> git.donarmstrong.com Git - mothur.git/commitdiff
metastats in progress
authorwestcott <westcott>
Tue, 6 Dec 2011 12:53:59 +0000 (12:53 +0000)
committerwestcott <westcott>
Tue, 6 Dec 2011 12:53:59 +0000 (12:53 +0000)
mothurmetastats.cpp

index 9ba40502a2294d5ce42ba591c076da6fd3015979..31542e80fe9f2d2b01c9d07cd97501954e841bcf 100644 (file)
@@ -204,6 +204,13 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                        storage[i][7]=temp[i][1];
                        storage[i][8]=pvalues[i];
                }
+               cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
+               cout << "pvalues" << endl;
+               for (int i = 0; i < row; i++){ if (pvalues[i] < 0.0000001) {
+                       pvalues[i] = 0.0;
+               }cout << pvalues[i] << '\t'; }
+               cout << endl;
+               calc_qvalues(pvalues);
                
                // BACKUP checks
                cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
@@ -246,6 +253,7 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                out << endl << endl;
                out.close();
                
+               
                return 0;
        
        }catch(exception& e) {
@@ -484,5 +492,325 @@ int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupi
        }
 }
 /***********************************************************/
+vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
+       try {
+               vector<double> qvalues;
+               int numRows = pValues.size();
+               
+               //fill lambdas with 0.00, 0.01, 0.02... 0.95
+               vector<double> lambdas(96, 0);
+               for (int i = 1; i < lambdas.size(); i++) { lambdas[i] = lambdas[i-1] + 0.01; }
+               
+               vector<double> pi0_hat(lambdas.size(), 0);
+               
+               //calculate pi0_hat
+               for (int l = 0; l < lambdas.size(); l++){ // for each lambda value
+                       int count = 0;
+                       for (int i = 0; i < numRows; i++){ // for each p-value in order
+                               if (pValues[i] > lambdas[l]){ count++; }
+                               pi0_hat[l] = count/(double)(numRows*(1-lambdas[l]));
+                       }
+               }
+               
+               //vector<double> f_spline = smoothSpline(lambdas, pi0_hat, 3);
+               //double pi0 = f_spline[(f_spline.size()-1)];   // this is the essential pi0_hat value
+               
+               //cubic Spline
+               double pi0, notsure; //this is the essential pi0_hat value
+               int notSure;
+               vector<double> resultsSpline(lambdas.size(), 0.0);
+               spline(pi0_hat, lambdas, notSure, notSure, resultsSpline);
+               //some sort of loop to get best value??
+               splint(pi0_hat, lambdas, notsure, pi0, resultsSpline);
+               
+               //order p-values
+               vector<double> ordered_qs = qvalues;
+               vector<int> ordered_ps(pValues.size(), 0);
+               for (int i = 1; i < ordered_ps.size(); i++) {  ordered_ps[i] = ordered_ps[i-1] + 1; }
+               vector<double> tempPvalues = pValues;
+               OrderPValues(0, numRows-1, tempPvalues, ordered_ps);
+       
+               ordered_qs[numRows-1] <- min((pValues[ordered_ps[numRows-1]]*pi0), 1.0);
+               for (int i = (numRows-2); i >= 0; i--){
+                       double p = pValues[ordered_ps[i]];
+                       double temp = p*numRows*pi0/(double)i;
+                       
+                       ordered_qs[i] = min(temp, ordered_qs[i+1]);
+                       ordered_qs[i] = min(ordered_qs[i], 1.0);
+               }
+               
+               //re-distribute calculated qvalues to appropriate rows
+               for (int i = 0; i < numRows; i++){
+                       qvalues[ordered_ps[i]] = ordered_qs[i];
+               }
+               
+               return qvalues;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "calc_qvalues");
+               exit(1);
+       }
+}
+/***********************************************************/
+int MothurMetastats::OrderPValues(int low, int high, vector<double>& p, vector<int>& order) {
+       try {
+               
+               if (low < high) {
+                       int i = low+1;
+                       int j = high;
+                       int pivot = (low+high) / 2;
+                       
+                       swapElements(low, pivot, p, order);  //puts pivot in final spot
+                       
+                       /* compare value */
+                       double key = p[low];
+                       
+                       /* partition */
+                       while(i <= j) {
+                               /* find member above ... */
+                               while((i <= high) && (p[i] <= key))     {  i++;  }  
+                               
+                               /* find element below ... */
+                               while((j >= low) && (p[j] > key))       {  j--;  } 
+                               
+                               if(i < j) {
+                                       swapElements(i, j, p, order);
+                               }
+                       } 
+                       
+                       swapElements(low, j, p, order);
+                       
+                       /* recurse */
+                       OrderPValues(low, j-1, p, order);
+                       OrderPValues(j+1, high, p, order); 
+               }               
+               
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "OrderPValues");
+               exit(1);
+       }
+}
+/***********************************************************/
+int MothurMetastats::swapElements(int i, int j, vector<double>& p, vector<int>& order) {
+       try {
+               
+               double z = p[i];
+               p[i] = p[j];
+               p[j] = z;
+               
+               int temp = order[i];
+               order[i] = order[j];
+               order[j] = temp;
+               
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "swapElements");
+               exit(1);
+       }
+}
+/***********************************************************
+vector<double> MothurMetastats::smoothSpline(vector<double> x, vector<double> y, int df) {
+       try {
+               
+               cout << "lambdas" << endl;
+               for (int l = 0; l < x.size(); l++){ cout << x[l] << '\t'; }
+               cout << endl << "pi0_hat" << endl;
+               for (int l = 0; l < y.size(); l++){ cout << y[l] << '\t'; }
+               cout << endl;
+               
+               //double low = -1.5; double high = 1.5; double tol = 1e-04; double eps = 2e-08; double maxit = 500;
+               int n = x.size();
+               vector<double> w(n, 1);
+               
+               //x <- signif(x, 6L) - I think this rounds to 6 decimals places
+               //ux <- unique(sort(x)) //x will be unique and sorted since we created it
+               //ox <- match(x, ux) //since its unique and sort it will match
+               
+               vector<double> wbar(n, 0);
+               vector<double> ybar(n, 0);
+               vector<double> xbar(n, 0.0);
+               double yssw = 0.0;
+               for (int i = 0; i < n; i++) {
+                       wbar[i] = w[i];
+                       ybar[i] = w[i]*y[i];
+                       yssw += (w[i] * y[i] * y[i]) - wbar[i] * (ybar[i] * ybar[i]);
+                       xbar[i] = (x[i] - x[0]) / (x[n-1] - x[0]);
+               }
+               
+               vector<double> knot = sknot1(xbar);
+               int nk = knot.size() - 4;
+               
+               //double ispar = 0.0; double spar = 0.0; double icrit = 3.0; double dofoff = 3.0;
+                       
+               return y;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "smoothSpline");
+               exit(1);
+       }
+}
+/***********************************************************/
+// This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
+int MothurMetastats::spline(vector<double>& x, vector<double>& y, int yp1, int ypn, vector<double>& y2) {
+       try {
+               
+               cout << "lambdas" << endl;
+               for (int l = 0; l < x.size(); l++){ cout << x[l] << '\t'; }
+               cout << endl << "pi0_hat" << endl;
+               for (int l = 0; l < y.size(); l++){ cout << y[l] << '\t'; }
+               cout << endl;
+               
+               double p, qn, sig, un;
+               
+               int n = y2.size();
+               vector<double> u(n-1, 0.0);
+               
+               if (yp1 > 0.99e30) { y2[0] = u[0] = 0.0; }
+               else {
+                       y2[0] = -0.5;
+                       u[0] = (3.0 / (x[1] - x[0])) * ((y[1] - y[0]) / (x[1]-x[0]) - yp1);
+               }
+               
+               for (int i = 1; i < n-1; i++) {
+                       sig = (x[i]-x[i-1])/(x[i+1]-x[i-1]);
+                       p = sig * y2[i-1] + 2.0;
+                       y2[i] = (sig - 1.0)/p;
+                       u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i] - y[i-1]) / (x[i]-x[i-1]);
+                       u[i] = (6.0*u[i]/(x[i+1]-x[i-1])-sig*u[i-1])/p;
+               }
+               
+               if (ypn > 0.99e30) { qn=un=0.0; }
+               else {
+                       qn=0.5;
+                       un=(3.0/(x[n-1]-x[n-2]))*(ypn-(y[n-1]-y[n-2])/(x[n-1]-x[n-2]));
+               }
+               
+               y2[n-1]=(un-qn*u[n-2])/(qn*y2[n-2]+1.0);
+               
+               for (int k=n-2; k>=0; k--) {
+                       y2[k]=y2[k]*y2[k+1]+u[k];
+               }
+               
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "spline");
+               exit(1);
+       }
+}
+/***********************************************************/
+// This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
+int MothurMetastats::splint(vector<double>& xa, vector<double>& ya, double& x, double& y, vector<double>& y2a) {
+       try {
+               int k;          
+               double h,b,a;
+               
+               int n = xa.size();
+               
+               int klo=0;
+               int khi=n-1;
+               while (khi-klo > 1) {
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       k = (khi+klo) >> 1;
+                       if (xa[k] > x) { khi=k; }
+                       else { klo=k; }
+               }
+               
+               h=xa[khi]-xa[klo];
+               if (h == 0.0) { m->mothurOut("[ERROR]: Bad xa input to splint routine."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+               a=(xa[khi]-x)/h;
+               b=(x - xa[klo])/h;
+               y=a*ya[klo]+b*ya[khi]+((a*a*a-a)*y2a[klo]+(b*b*b-b)*y2a[khi])*(h*h)/6.0;
+               
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "splint");
+               exit(1);
+       }
+}
+/***********************************************************/
+vector<double> MothurMetastats::sknot1(vector<double> x) {
+       try {
+               vector<double> knots;
+               int n = x.size();
+               int nk = nkn(n);
+               
+               cout << nk << endl;
+               //R equivalent - rep(x[1L], 3L)
+               knots.push_back(x[0]); knots.push_back(x[0]); knots.push_back(x[0]);
+               
+               //generate a sequence of nk equally spaced values from 1 to n. R equivalent = seq.int(1, n, length.out = nk)
+               vector<int> indexes = getSequence(0, n-1, nk);
+               for (int i = 0; i < indexes.size(); i++) { knots.push_back(x[indexes[i]]); }
+               
+               //R equivalent - rep(x[n], 3L)
+               knots.push_back(x[n-1]); knots.push_back(x[n-1]); knots.push_back(x[n-1]);
+               
+               return knots;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "sknot1");
+               exit(1);
+       }
+}
+/***********************************************************/
+vector<int> MothurMetastats::getSequence(int start, int end, int length) {
+       try {
+               vector<int> sequence;
+               double increment = (end-start) / (double) (length-1);
+               
+               sequence.push_back(start);
+               for (int i = 1; i < length-1; i++) {
+                       sequence.push_back(int(i*increment));
+               }
+               sequence.push_back(end);
+               
+               return sequence;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "getSequence");
+               exit(1);
+       }
+}      
+/***********************************************************/
+//not right, havent fully figured out the variable types yet...
+int MothurMetastats::nkn(int n) {
+       try {
+               
+               if (n < 50) { return n; }
+               else {
+                       double a1 = log2(50);
+                       double a2 = log2(100);
+                       double a3 = log2(140);
+                       double a4 = log2(200);
+                       
+                       if (n < 200) {
+                               return (int)pow(2.0, (a1 + (a2 - a1) * (n - 50)/(float)150));
+                       }else if (n < 800) {
+                               return (int)pow(2.0, (a2 + (a3 - a2) * (n - 200)/(float)600));
+                       }else if (n < 3200) {
+                               return (int)pow(2.0, (a3 + (a4 - a3) * (n - 800)/(float)2400));
+                       }else {
+                               return (int)pow((double)(200 + (n - 3200)), 0.2);
+                       }
+               }
+       
+               return 0;
+               
+       }catch(exception& e) {
+               m->errorOut(e, "MothurMetastats", "nkn");
+               exit(1);
+       }
+}
+/***********************************************************/
+
+
+