fixes while testing 1.33.0

[mothur.git] / linearalgebra.cpp
diff --git a/linearalgebra.cpp b/linearalgebra.cpp

index effb1ad44e04abd9c1bdd8523ca3b05f4253de94..bacf2c1c07bb420430d07416b6ca7354c650a79b 100644 (file)
--- a/linearalgebra.cpp
+++ b/linearalgebra.cpp
@@ -8,6 +8,9 @@
   */
  
  #include "linearalgebra.h"
+#include "wilcox.h"
+
+#define PI 3.1415926535897932384626433832795
  
  // This class references functions used from "Numerical Recipes in C++" //
  
@@ -111,7 +114,7 @@ double LinearAlgebra::gammp(const double a, const double x) {
  }
  /*********************************************************************************************************************************/
  //Numerical Recipes pg. 223
-double LinearAlgebra::gammq(const double a, const double x) {
+/*double LinearAlgebra::gammq(const double a, const double x) {
      try {
          double gamser,gammcf,gln;
          
@@ -127,11 +130,11 @@ double LinearAlgebra::gammq(const double a, const double x) {
          return 0;
      }
         catch(exception& e) {
-               m->errorOut(e, "LinearAlgebra", "gammp");
+               m->errorOut(e, "LinearAlgebra", "gammq");
                 exit(1);
         }
  }
-/*********************************************************************************************************************************/
+*********************************************************************************************************************************/
  //Numerical Recipes pg. 224
  double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){
      try {
@@ -158,7 +161,7 @@ double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double
              h *= del;
              if (fabs(del-1.0) <= EPS) break;
          }
-        if (i > ITMAX)  { m->mothurOut("[ERROR]: a too large, ITMAX too small in gcf\n"); m->control_pressed = true; }
+        if (i > ITMAX)  { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->control_pressed = true; }
          gammcf=exp(-x+a*log(x)-gln)*h;
          
          return 0.0;
@@ -252,7 +255,7 @@ double LinearAlgebra::betacf(const double a, const double b, const double x) {
         }
  }
  /*********************************************************************************************************************************/
-
+//[3][4] * [4][5] - columns in first must match rows in second, returns matrix[3][5]
  vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
         try {
                 vector<vector<double> > product;
@@ -286,7 +289,23 @@ vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first
         }
         
  }
+/*********************************************************************************************************************************/
  
+vector<vector<double> > LinearAlgebra::transpose(vector<vector<double> >matrix){
+       try {
+               vector<vector<double> > trans; trans.resize(matrix[0].size());
+        for (int i = 0; i < trans.size(); i++) {
+            for (int j = 0; j < matrix.size(); j++) { trans[i].push_back(matrix[j][i]); }
+        }
+                               
+               return trans;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "transpose");
+               exit(1);
+       }
+       
+}
  /*********************************************************************************************************************************/
  
  void LinearAlgebra::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
@@ -1228,7 +1247,259 @@ double LinearAlgebra::calcKendallSig(double n, double r){
                 exit(1);
         }
  }
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pValue){
+       try {
+        double H;
+        set<string> treatments;
+        
+        //rank values
+        sort(values.begin(), values.end(), compareSpearman);
+        vector<spearmanRank*> ties;
+        int rankTotal = 0;
+        vector<int> TIES;
+        for (int j = 0; j < values.size(); j++) {
+            treatments.insert(values[j].name);
+            rankTotal += (j+1);
+            ties.push_back(&(values[j]));
+            
+            if (j != values.size()-1) { // you are not the last so you can look ahead
+                if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue
+                    if (ties.size() > 1) { TIES.push_back(ties.size()); }
+                    for (int k = 0; k < ties.size(); k++) {
+                        double thisrank = rankTotal / (double) ties.size();
+                        (*ties[k]).score = thisrank;
+                    }
+                    ties.clear();
+                    rankTotal = 0;
+                }
+            }else { // you are the last one
+                if (ties.size() > 1) { TIES.push_back(ties.size()); }
+                for (int k = 0; k < ties.size(); k++) {
+                    double thisrank = rankTotal / (double) ties.size();
+                    (*ties[k]).score = thisrank;
+                }
+            }
+        }
+        
+        
+        // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1)
+        map<string, double> sums;
+        map<string, double> counts;
+        for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; }
+        
+        for (int j = 0; j < values.size(); j++) {
+            sums[values[j].name] += values[j].score;
+            counts[values[j].name]+= 1.0;
+        }
+        
+        double middleTerm = 0.0;
+        for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) {
+            middleTerm += ((sums[*it]*sums[*it])/counts[*it]);
+        }
+        
+        double firstTerm = 12 / (double) (values.size()*(values.size()+1));
+        double lastTerm = 3 * (values.size()+1);
+        
+        H = firstTerm * middleTerm - lastTerm;
+       
+        //adjust for ties
+        if (TIES.size() != 0) {
+            double sum = 0.0;
+            for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
+            double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size()));
+            H /= result;
+        }
+        
+        if (isnan(H) || isinf(H)) { H = 0; }
+        
+        //Numerical Recipes pg221
+        pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
+        
+        return H;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcKruskalWallis");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+double LinearAlgebra::normalvariate(double mean, double standardDeviation) {
+    try {
+        double u1 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0);
+        double u2 = ((double)(rand()) + 1.0 )/( (double)(RAND_MAX) + 1.0);
+        //double r = sqrt( -2.0*log(u1) );
+        //double theta = 2.0*PI*u2;
+        //cout << cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)) << endl;
+        return cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1));
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "normalvariate");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//thanks http://www.johndcook.com/cpp_phi.html
+double LinearAlgebra::pnorm(double x){
+    try {
+        // constants
+        double a1 =  0.254829592;
+        double a2 = -0.284496736;
+        double a3 =  1.421413741;
+        double a4 = -1.453152027;
+        double a5 =  1.061405429;
+        double p  =  0.3275911;
+        
+        // Save the sign of x
+        int sign = 1;
+        if (x < 0)
+            sign = -1;
+        x = fabs(x)/sqrt(2.0);
+        
+        // A&S formula 7.1.26
+        double t = 1.0/(1.0 + p*x);
+        double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x);
+        
+        return 0.5*(1.0 + sign*y);
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "pnorm");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcWilcoxon(vector<double>& x, vector<double>& y, double& sig){
+       try {           
+               double W = 0.0;
+        sig = 0.0;
+        
+        vector<spearmanRank> ranks;
+        for (int i = 0; i < x.size(); i++) {
+            if (m->control_pressed) { return W; }
+            spearmanRank member("x", x[i]);
+            ranks.push_back(member);
+        }
+        
+        for (int i = 0; i < y.size(); i++) {
+            if (m->control_pressed) { return W; }
+            spearmanRank member("y", y[i]);
+            ranks.push_back(member);
+        }
+        
+        //sort values
+               sort(ranks.begin(), ranks.end(), compareSpearman);
+               
+               //convert scores to ranks of x
+               vector<spearmanRank*> ties;
+               int rankTotal = 0;
+        vector<int> TIES;
+               for (int j = 0; j < ranks.size(); j++) {
+            if (m->control_pressed) { return W; }
+                       rankTotal += (j+1);
+                       ties.push_back(&(ranks[j]));
+            
+                       if (j != ranks.size()-1) { // you are not the last so you can look ahead
+                               if (ranks[j].score != ranks[j+1].score) { // you are done with ties, rank them and continue
+                    if (ties.size() > 1) { TIES.push_back(ties.size()); }
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               (*ties[k]).score = thisrank;
+                                       }
+                                       ties.clear();
+                                       rankTotal = 0;
+                               }
+                       }else { // you are the last one
+                if (ties.size() > 1) { TIES.push_back(ties.size()); }
+                               for (int k = 0; k < ties.size(); k++) {
+                                       float thisrank = rankTotal / (float) ties.size();
+                                       (*ties[k]).score = thisrank;
+                               }
+                       }
+               }
+        
+        //from R wilcox.test function
+        //STATISTIC <- sum(r[seq_along(x)]) - n.x * (n.x + 1)/2
+        double sumRanks = 0.0;
+        for (int i = 0; i < ranks.size(); i++) {
+            if (m->control_pressed) { return W; }
+            if (ranks[i].name == "x") { sumRanks += ranks[i].score; }
+        }
+        
+        W = sumRanks - x.size() * ((double)(x.size() + 1)) / 2.0;
+        
+        //exact <- (n.x < 50) && (n.y < 50)
+        bool findExact = false;
+        if ((x.size() < 50) && (y.size() < 50)) { findExact = true; }
+        
+        
+        if (findExact && (TIES.size() == 0)) { //find exact and no ties
+            //PVAL <- switch(alternative, two.sided = {
+            //p <- if (STATISTIC > (n.x * n.y/2))
+            PWilcox wilcox;
+            double pval = 0.0;
+            if (W > ((double)x.size()*y.size()/2.0)) {
+                //pwilcox(STATISTIC-1, n.x, n.y, lower.tail = FALSE)
+                pval = wilcox.pwilcox(W-1, x.size(), y.size(), false);
+            }else {
+                //pwilcox(STATISTIC,n.x, n.y)
+                pval = wilcox.pwilcox(W, x.size(), y.size(), true);
+            }
+            sig = 2.0 * pval;
+            if (1.0 < sig) { sig = 1.0; }
+        }else {
+            //z <- STATISTIC - n.x * n.y/2
+            double z = W - (double)(x.size() * y.size()/2.0);
+            //NTIES <- table(r)
+            double sum = 0.0;
+            for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
+           
+            //SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) -
+                                            //sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y -
+                                                                            //1))))
+            double sigma = 0.0;
+            double firstTerm = (double)(x.size() * y.size()/12.0);
+            double secondTerm = (double)(x.size() + y.size() + 1) - sum / (double)((x.size() + y.size()) * (x.size() + y.size() - 1));
+            sigma = sqrt(firstTerm * secondTerm);
+            
+            //CORRECTION <- switch(alternative, two.sided = sign(z) * 0.5, greater = 0.5, less = -0.5)
+            double CORRECTION = 0.0;
+            if (z < 0) { CORRECTION = -1.0; }
+            else if (z > 0) { CORRECTION = 1.0; }
+            CORRECTION *= 0.5;
+            
+            z = (z - CORRECTION)/sigma;
+            
+            //PVAL <- switch(alternative,  two.sided = 2 * min(pnorm(z), pnorm(z, lower.tail = FALSE)))
+            sig = pnorm(z);
+            if ((1.0-sig) < sig) { sig = 1.0 - sig; }
+            sig *= 2;
+        }
+        
+        return W;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcWilcoxon");
+               exit(1);
+       }
+}
  
+/*********************************************************************************************************************************/
+double LinearAlgebra::choose(double n, double k){
+       try {
+        n = floor(n + 0.5);
+        k = floor(k + 0.5);
+        
+        double lchoose = gammln(n + 1.0) - gammln(k + 1.0) - gammln(n - k + 1.0);
+        
+        return (floor(exp(lchoose) + 0.5));
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "choose");
+               exit(1);
+       }
+}
  /*********************************************************************************************************************************/
  double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
         try {
@@ -1479,29 +1750,865 @@ double LinearAlgebra::calcPearsonSig(double n, double r){
  /*********************************************************************************************************************************/
  
  vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
+    try {
  
-       int numSamples = relAbundData.size();
-       int numOTUs = relAbundData[0].size();
-       
-       vector<vector<double> > dMatrix(numSamples);
-       for(int i=0;i<numSamples;i++){
-               dMatrix[i].resize(numSamples);
+        int numSamples = relAbundData.size();
+        int numOTUs = relAbundData[0].size();
+        
+        vector<vector<double> > dMatrix(numSamples);
+        for(int i=0;i<numSamples;i++){
+            dMatrix[i].resize(numSamples);
+        }
+        
+        for(int i=0;i<numSamples;i++){
+            for(int j=0;j<numSamples;j++){
+                
+                if (m->control_pressed) { return dMatrix; }
+                
+                double d = 0;
+                for(int k=0;k<numOTUs;k++){
+                    d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
+                }
+                dMatrix[i][j] = pow(d, 0.50000);
+                dMatrix[j][i] = dMatrix[i][j];
+                
+            }
+        }
+        return dMatrix;
         }
-       
-       for(int i=0;i<numSamples;i++){
-               for(int j=0;j<numSamples;j++){
-                       
-                       double d = 0;
-                       for(int k=0;k<numOTUs;k++){
-                               d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
-                       }
-                       dMatrix[i][j] = pow(d, 0.50000);
-                       dMatrix[j][i] = dMatrix[i][j];
-                       
-               }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "getObservedEuclideanDistance");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<double> b){
+    try {
+        int length = (int)b.size();
+        vector<double> x(length, 0);
+        vector<int> index(length);
+        for(int i=0;i<length;i++){  index[i] = i;   }
+        double d;
+        
+        ludcmp(A, index, d);  if (m->control_pressed) { return b; }
+        lubksb(A, index, b);
+        
+        return b;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "solveEquations");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
+    try {
+        int length = (int)b.size();
+        vector<double> x(length, 0);
+        vector<int> index(length);
+        for(int i=0;i<length;i++){  index[i] = i;   }
+        float d;
+        
+        ludcmp(A, index, d);  if (m->control_pressed) { return b; }
+        lubksb(A, index, b);
+        
+        return b;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "solveEquations");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<double> >& A, vector<int>& index, double& d){
+    try {
+        double tiny = 1e-20;
+        
+        int n = (int)A.size();
+        vector<double> vv(n, 0.0);
+        double temp;
+        int imax;
+        
+        d = 1.0;
+        
+        for(int i=0;i<n;i++){
+            double big = 0.0;
+            for(int j=0;j<n;j++){   if((temp=fabs(A[i][j])) > big ) big=temp;  }
+            if(big==0.0){   m->mothurOut("Singular matrix in routine ludcmp\n");    }
+            vv[i] = 1.0/big;
+        }
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            for(int i=0;i<j;i++){
+                double sum = A[i][j];
+                for(int k=0;k<i;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+            }
+            
+            double big = 0.0;
+            for(int i=j;i<n;i++){
+                double sum = A[i][j];
+                for(int k=0;k<j;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+                double dum;
+                if((dum = vv[i] * fabs(sum)) >= big){
+                    big = dum;
+                    imax = i;
+                }
+            }
+            if(j != imax){
+                for(int k=0;k<n;k++){
+                    double dum = A[imax][k];
+                    A[imax][k] = A[j][k];
+                    A[j][k] = dum;
+                }
+                d = -d;
+                vv[imax] = vv[j];
+            }
+            index[j] = imax;
+            
+            if(A[j][j] == 0.0){ A[j][j] = tiny; }
+            
+            if(j != n-1){
+                double dum = 1.0/A[j][j];
+                for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+            }
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "ludcmp");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vector<double>& b){
+    try {
+        double total;
+        int n = (int)A.size();
+        int ii = 0;
+        
+        for(int i=0;i<n;i++){
+            if (m->control_pressed) { break; }
+            int ip = index[i];
+            total = b[ip];
+            b[ip] = b[i];
+            
+            if (ii != 0) {
+                for(int j=ii-1;j<i;j++){
+                    total -= A[i][j] * b[j];
+                }
+            }
+            else if(total != 0){  ii = i+1;   }
+            b[i] = total;
+        }
+        for(int i=n-1;i>=0;i--){
+            total = b[i];
+            for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j];  }
+            b[i] = total / A[i][i];
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "lubksb");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
+    try {
+        double tiny = 1e-20;
+        
+        int n = (int)A.size();
+        vector<float> vv(n, 0.0);
+        double temp;
+        int imax;
+        
+        d = 1.0;
+        
+        for(int i=0;i<n;i++){
+            float big = 0.0;
+            for(int j=0;j<n;j++){   if((temp=fabs(A[i][j])) > big ) big=temp;  }
+            if(big==0.0){   m->mothurOut("Singular matrix in routine ludcmp\n");    }
+            vv[i] = 1.0/big;
+        }
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            for(int i=0;i<j;i++){
+                float sum = A[i][j];
+                for(int k=0;k<i;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+            }
+            
+            float big = 0.0;
+            for(int i=j;i<n;i++){
+                float sum = A[i][j];
+                for(int k=0;k<j;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+                float dum;
+                if((dum = vv[i] * fabs(sum)) >= big){
+                    big = dum;
+                    imax = i;
+                }
+            }
+            if(j != imax){
+                for(int k=0;k<n;k++){
+                    float dum = A[imax][k];
+                    A[imax][k] = A[j][k];
+                    A[j][k] = dum;
+                }
+                d = -d;
+                vv[imax] = vv[j];
+            }
+            index[j] = imax;
+            
+            if(A[j][j] == 0.0){ A[j][j] = tiny; }
+            
+            if(j != n-1){
+                float dum = 1.0/A[j][j];
+                for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+            }
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "ludcmp");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
+    try {
+        float total;
+        int n = (int)A.size();
+        int ii = 0;
+        
+        for(int i=0;i<n;i++){
+            if (m->control_pressed) { break; }
+            int ip = index[i];
+            total = b[ip];
+            b[ip] = b[i];
+            
+            if (ii != 0) {
+                for(int j=ii-1;j<i;j++){
+                    total -= A[i][j] * b[j];
+                }
+            }
+            else if(total != 0){  ii = i+1;   }
+            b[i] = total;
+        }
+        for(int i=n-1;i>=0;i--){
+            total = b[i];
+            for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j];  }
+            b[i] = total / A[i][i];
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "lubksb");
+               exit(1);
         }
-       return dMatrix;
-       
  }
  
  /*********************************************************************************************************************************/
+
+vector<vector<double> > LinearAlgebra::getInverse(vector<vector<double> > matrix){
+    try {
+        int n = (int)matrix.size();
+        
+        vector<vector<double> > inverse(n);
+        for(int i=0;i<n;i++){   inverse[i].assign(n, 0.0000);   }
+        
+        vector<double> column(n, 0.0000);
+        vector<int> index(n, 0);
+        double dummy;
+        
+        ludcmp(matrix, index, dummy);
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            
+            column.assign(n, 0);
+            
+            column[j] = 1.0000;
+            
+            lubksb(matrix, index, column);
+            
+            for(int i=0;i<n;i++){   inverse[i][j] = column[i];  }
+        }
+        
+        return inverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "getInverse");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//modelled R lda function - MASS:::lda.default
+vector< vector<double> > LinearAlgebra::lda(vector< vector<double> >& a, vector<string> groups, vector< vector<double> >& means, bool& ignore) {
+    try {
+        
+        set<string> uniqueGroups;
+        for (int i = 0; i < groups.size(); i++) { uniqueGroups.insert(groups[i]); }
+        int numGroups = uniqueGroups.size();
+        
+        map<string, int> quickIndex; //className to index. hoping to save counts, proportions and means in vectors to save time. This map will allow us to know index 0 in counts refers to group1.
+        int count = 0;
+        for (set<string>::iterator it = uniqueGroups.begin(); it != uniqueGroups.end(); it++) { quickIndex[*it] = count; count++; }
+        
+        int numSampled = groups.size(); //number of sampled groups
+        int numOtus = a.size(); //number of flagged bins
+        
+        //counts <- as.vector(table(g)) //number of samples from each class in random sampling
+        vector<int> counts; counts.resize(numGroups, 0);
+        for (int i = 0; i < groups.size(); i++) {
+            counts[quickIndex[groups[i]]]++;
+        }
+        
+        vector<double> proportions; proportions.resize(numGroups, 0.0);
+        for (int i = 0; i < numGroups; i++) {  proportions[i] = counts[i] / (double) numSampled; }
+        
+        means.clear(); //means[0] -> means[0][0] average for [group0][OTU0].
+        means.resize(numGroups); for (int i = 0; i < means.size(); i++) { means[i].resize(numOtus, 0.0); }
+        for (int j = 0; j < numSampled; j++) { //total for each class for each OTU
+            for (int i = 0; i < numOtus; i++) { means[quickIndex[groups[j]]][i] += a[i][j]; }
+        }
+        //average for each class for each OTU
+        for (int j = 0; j < numGroups; j++) { for (int i = 0; i < numOtus; i++) { means[j][i] /= counts[j]; }  }
+        
+        //randCov <- x - group.means[g, ]
+        vector< vector<double> > randCov; //randCov[0][0] -> (random sample value0 for OTU0 - average for samples group in OTU0). example OTU0, random sample 0.01 from class early. average of class early for OTU0 is 0.005. randCov[0][0] = (0.01-0.005)
+        for (int i = 0; i < numOtus; i++) { //for each flagged OTU
+            vector<double> tempRand;
+            for (int j = 0; j < numSampled; j++) { tempRand.push_back(a[i][j] - means[quickIndex[groups[j]]][i]);  }
+            randCov.push_back(tempRand);
+        }
+        
+        //find variance and std for each OTU
+        //f1 <- sqrt(diag(var(x - group.means[g, ])))
+        vector<double> stdF1;
+        vector<double> ave;
+        for (int i = 0; i < numOtus; i++) {
+            stdF1.push_back(0.0);
+            ave.push_back(m->getAverage(randCov[i]));
+        }
+        
+        for (int i = 0; i < numOtus; i++) {
+            for (int j = 0; j < numSampled; j++) { stdF1[i] += ((randCov[i][j] - ave[i]) * (randCov[i][j] - ave[i]));  }
+        }
+        
+        //fac <- 1/(n - ng)
+        double fac = 1 / (double) (numSampled-numGroups);
+        
+        for (int i = 0; i < stdF1.size(); i++) {
+            stdF1[i] /= (double) (numSampled-1);
+            stdF1[i] = sqrt(stdF1[i]);
+        }
+        
+        vector< vector<double> > scaling; //[numOTUS][numOTUS]
+        for (int i = 0; i < numOtus; i++) {
+            vector<double> temp;
+            for (int j = 0; j < numOtus; j++) {
+                if (i == j) { temp.push_back(1.0/stdF1[i]); }
+                else { temp.push_back(0.0); }
+                
+            }
+            scaling.push_back(temp);
+        }
+        /*
+         cout << "scaling = " << endl;
+         for (int i = 0; i < scaling.size(); i++) {
+         for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
+         cout << endl;
+         }*/
+        
+        //X <- sqrt(fac) * ((x - group.means[g, ]) %*% scaling)
+        vector< vector<double> > X = randCov; //[numOTUS][numSampled]
+        //((x - group.means[g, ]) %*% scaling)
+        //matrix multiplication of randCov and scaling
+        LinearAlgebra linear;
+        X = linear.matrix_mult(scaling, randCov); //[numOTUS][numOTUS] * [numOTUS][numSampled] = [numOTUS][numSampled]
+        fac = sqrt(fac);
+        
+        for (int i = 0; i < X.size(); i++) {
+            for (int j = 0; j < X[i].size(); j++) { X[i][j] *= fac;  }
+        }
+        
+        vector<double> d;
+        vector< vector<double> > v;
+        vector< vector<double> > Xcopy; //X = [numOTUS][numSampled]
+        bool transpose = false; //svd requires rows < columns, so if they are not then I need to transpose and look for the results in v.
+        if (X.size() < X[0].size()) { Xcopy = linear.transpose(X); transpose=true; }
+        else                        { Xcopy = X;                    }
+        linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below, because R's version is [numSampled][numOTUS]
+        
+        /*cout << "Xcopy = " << endl;
+        for (int i = 0; i < Xcopy.size(); i++) {
+            for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
+            cout << endl;
+        }
+        cout << "v = " << endl;
+        for (int i = 0; i < v.size(); i++) {
+            for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; }
+            cout << endl;
+        }
+         */
+        
+        int rank = 0;
+        set<int> goodColumns;
+        //cout << "d = " << endl;
+        for (int i = 0; i < d.size(); i++) {  if (d[i] > 0.0000000001) { rank++; goodColumns.insert(i); } } //cout << d[i] << endl;
+        
+        if (rank == 0) {
+            ignore=true; //m->mothurOut("[ERROR]: rank = 0: variables are numerically const\n"); m->control_pressed = true;
+            return scaling; }
+        
+        //scaling <- scaling %*% X.s$v[, 1L:rank] %*% diag(1/X.s$d[1L:rank], , rank)
+        //X.s$v[, 1L:rank] = columns in Xcopy that correspond to "good" d values
+        //diag(1/X.s$d[1L:rank], , rank) = matrix size rank * rank where the diagonal is 1/"good" dvalues
+        /*example:
+         d
+         [1] 3.721545e+00 3.034607e+00 2.296649e+00 7.986927e-16 6.922408e-16
+         [6] 5.471102e-16
+         
+         $v
+         [,1]        [,2]        [,3]        [,4]        [,5]        [,6]
+         [1,]  0.31122175  0.10944725  0.20183340 -0.30136820  0.60786235 -0.13537095
+         [2,] -0.29563726 -0.20568893  0.11233366 -0.05073289  0.48234270  0.21965978
+         ...
+         
+         [1] "X.s$v[, 1L:rank]"
+         [,1]        [,2]        [,3]
+         [1,]  0.31122175  0.10944725  0.20183340
+         [2,] -0.29563726 -0.20568893  0.11233366
+         ...
+         [1] "1/X.s$d[1L:rank]"
+         [1] 0.2687056 0.3295320 0.4354170
+         
+         [1] "diag(1/X.s$d[1L:rank], , rank)"
+         [,1]     [,2]     [,3]
+         [1,] 0.2687056 0.000000 0.000000
+         [2,] 0.0000000 0.329532 0.000000
+         [3,] 0.0000000 0.000000 0.435417
+         */
+        if (transpose) {
+            Xcopy = linear.transpose(v);
+            /*
+            cout << "Xcopy = " << endl;
+            for (int i = 0; i < Xcopy.size(); i++) {
+                for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
+                cout << endl;
+            }*/
+        }
+        v.clear(); //store "good" columns - X.s$v[, 1L:rank]
+        v.resize(Xcopy.size()); //[numOTUS]["good" columns]
+        for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {
+            for (int i = 0; i < Xcopy.size(); i++) {
+                v[i].push_back(Xcopy[i][*it]);
+            }
+        }
+        
+        vector< vector<double> > diagRanks; diagRanks.resize(rank);
+        for (int i = 0; i < rank; i++) { diagRanks[i].resize(rank, 0.0); }
+        count = 0;
+        for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {  diagRanks[count][count] = 1.0 / d[*it]; count++; }
+        
+        scaling = linear.matrix_mult(linear.matrix_mult(scaling, v), diagRanks); //([numOTUS][numOTUS]*[numOTUS]["good" columns]) = [numOTUS]["good" columns] then ([numOTUS]["good" columns] * ["good" columns]["good" columns] = scaling = [numOTUS]["good" columns]
+        
+        /*cout << "scaling = " << endl;
+        for (int i = 0; i < scaling.size(); i++) {
+            for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
+            cout << endl;
+        }*/
+        
+        //Note: linear.matrix_mult [1][numGroups] * [numGroups][numOTUs] - columns in first must match rows in second, returns matrix[1][numOTUs]
+        vector< vector<double> > prior; prior.push_back(proportions);
+        vector< vector<double> >  xbar = linear.matrix_mult(prior, means);
+        vector<double> xBar = xbar[0]; //length numOTUs
+        
+        /*cout << "xbar" << endl;
+        for (int j = 0; j < numOtus; j++) {  cout << xBar[j] <<'\t'; }cout <<  endl;*/
+        //fac <- 1/(ng - 1)
+        fac = 1 / (double) (numGroups-1);
+        //scale(group.means, center = xbar, scale = FALSE) %*% scaling
+        vector< vector<double> > scaledMeans = means; //[numGroups][numOTUs]
+        for (int i = 0; i < numGroups; i++) {
+            for (int j = 0; j < numOtus; j++) {  scaledMeans[i][j] -= xBar[j]; }
+        }
+        scaledMeans = linear.matrix_mult(scaledMeans, scaling); //[numGroups][numOTUS]*[numOTUS]["good"columns] = [numGroups]["good"columns]
+        
+        
+        //sqrt((n * prior) * fac)
+        vector<double> temp = proportions; //[numGroups]
+        for (int i = 0; i < temp.size(); i++) { temp[i] *= numSampled * fac; temp[i] = sqrt(temp[i]);  }
+        
+        //X <- sqrt((n * prior) * fac) * (scale(group.means, center = xbar, scale = FALSE) %*% scaling)
+        //X <- temp * scaledMeans
+        X.clear(); X = scaledMeans; //[numGroups]["good"columns]
+        for (int i = 0; i < X.size(); i++) {
+            for (int j = 0; j < X[i].size(); j++) {  X[i][j] *= temp[j];  }
+        }
+        /*
+        cout << "X = " << endl;
+        for (int i = 0; i < X.size(); i++) {
+            for (int j = 0; j < X[i].size(); j++) { cout << X[i][j] << '\t'; }
+            cout << endl;
+        }
+        */
+        
+        d.clear(); v.clear();
+        //we want to transpose so results are in Xcopy, but if that makes rows > columns then we don't since svd requires rows < cols.
+        transpose=false;
+        if (X.size() > X[0].size()) {   Xcopy = X;  transpose=true;     }
+        else                        {   Xcopy = linear.transpose(X);    }
+        linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below
+        /*cout << "Xcopy = " << endl;
+        for (int i = 0; i < Xcopy.size(); i++) {
+            for (int j = 0; j < Xcopy[i].size(); j++) { cout << Xcopy[i][j] << '\t'; }
+            cout << endl;
+        }
+        
+        cout << "v = " << endl;
+        for (int i = 0; i < v.size(); i++) {
+            for (int j = 0; j < v[i].size(); j++) { cout << v[i][j] << '\t'; }
+            cout << endl;
+        }
+        
+        cout << "d = " << endl;
+        for (int i = 0; i < d.size(); i++) { cout << d[i] << endl; }*/
+        
+        //rank <- sum(X.s$d > tol * X.s$d[1L])
+        //X.s$d[1L] = larger value in d vector
+        double largeD = m->max(d);
+        rank = 0; goodColumns.clear();
+        for (int i = 0; i < d.size(); i++) { if (d[i] > (0.0000000001*largeD)) { rank++; goodColumns.insert(i); } }
+        
+        if (rank == 0) {
+            ignore=true;//m->mothurOut("[ERROR]: rank = 0: class means are numerically identical.\n"); m->control_pressed = true;
+            return scaling; }
+        
+        if (transpose) { Xcopy = linear.transpose(v);  }
+        //scaling <- scaling %*% X.s$v[, 1L:rank] - scaling * "good" columns
+        v.clear(); //store "good" columns - X.s$v[, 1L:rank]
+        v.resize(Xcopy.size()); //Xcopy = ["good"columns][numGroups]
+        for (set<int>::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) {
+            for (int i = 0; i < Xcopy.size(); i++) {
+                v[i].push_back(Xcopy[i][*it]);
+            }
+        }
+        
+        scaling = linear.matrix_mult(scaling, v); //[numOTUS]["good" columns] * ["good"columns][new "good" columns]
+        
+        /*cout << "scaling = " << endl;
+        for (int i = 0; i < scaling.size(); i++) {
+            for (int j = 0; j < scaling[i].size(); j++) { cout << scaling[i][j] << '\t'; }
+            cout << endl;
+        }*/
+        ignore=false;
+        return scaling;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "lda");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+//Singular value decomposition (SVD) - adapted from http://svn.lirec.eu/libs/magicsquares/src/SVD.cpp
+/*
+ * svdcomp - SVD decomposition routine.
+ * Takes an mxn matrix a and decomposes it into udv, where u,v are
+ * left and right orthogonal transformation matrices, and d is a
+ * diagonal matrix of singular values.
+ *
+ * This routine is adapted from svdecomp.c in XLISP-STAT 2.1 which is
+ * code from Numerical Recipes adapted by Luke Tierney and David Betz.
+ *
+ * Input to dsvd is as follows:
+ *   a = mxn matrix to be decomposed, gets overwritten with u
+ *   m = row dimension of a
+ *   n = column dimension of a
+ *   w = returns the vector of singular values of a
+ *   v = returns the right orthogonal transformation matrix
+ */
+
+int LinearAlgebra::svd(vector< vector<double> >& a, vector<double>& w, vector< vector<double> >& v) {
+    try {
+        int flag, i, its, j, jj, k, l, nm;
+        double c, f, h, s, x, y, z;
+        double anorm = 0.0, g = 0.0, scale = 0.0;
+
+        int numRows = a.size(); if (numRows == 0) { return 0; }
+        int numCols = a[0].size();
+        w.resize(numCols, 0.0);
+        v.resize(numCols); for (int i = 0; i < numCols; i++) { v[i].resize(numRows, 0.0); }
+    
+        vector<double> rv1; rv1.resize(numCols, 0.0);
+        if (numRows < numCols){  m->mothurOut("[ERROR]: numRows < numCols\n"); m->control_pressed = true; return 0; }
+
+        /* Householder reduction to bidiagonal form */
+        for (i = 0; i < numCols; i++)
+        {
+            /* left-hand reduction */
+            l = i + 1;
+            rv1[i] = scale * g;
+            g = s = scale = 0.0;
+            if (i < numRows)
+            {
+                for (k = i; k < numRows; k++)
+                    scale += fabs((double)a[k][i]);
+                if (scale)
+                {
+                    for (k = i; k < numRows; k++)
+                    {
+                        a[k][i] = (double)((double)a[k][i]/scale);
+                        s += ((double)a[k][i] * (double)a[k][i]);
+                    }
+                    f = (double)a[i][i];
+                    g = -SIGN(sqrt(s), f);
+                    h = f * g - s;
+                    a[i][i] = (double)(f - g);
+                    if (i != numCols - 1)
+                    {
+                        for (j = l; j < numCols; j++)
+                        {
+                            for (s = 0.0, k = i; k < numRows; k++)
+                                s += ((double)a[k][i] * (double)a[k][j]);
+                            f = s / h;
+                            for (k = i; k < numRows; k++)
+                                a[k][j] += (double)(f * (double)a[k][i]);
+                        }
+                    }
+                    for (k = i; k < numRows; k++)
+                        a[k][i] = (double)((double)a[k][i]*scale);
+                }
+            }
+            w[i] = (double)(scale * g);
+            
+            /* right-hand reduction */
+            g = s = scale = 0.0;
+            if (i < numRows && i != numCols - 1)
+            {
+                for (k = l; k < numCols; k++)
+                    scale += fabs((double)a[i][k]);
+                if (scale)
+                {
+                    for (k = l; k < numCols; k++)
+                    {
+                        a[i][k] = (double)((double)a[i][k]/scale);
+                        s += ((double)a[i][k] * (double)a[i][k]);
+                    }
+                    f = (double)a[i][l];
+                    g = -SIGN(sqrt(s), f);
+                    h = f * g - s;
+                    a[i][l] = (double)(f - g);
+                    for (k = l; k < numCols; k++)
+                        rv1[k] = (double)a[i][k] / h;
+                    if (i != numRows - 1)
+                    {
+                        for (j = l; j < numRows; j++)
+                        {
+                            for (s = 0.0, k = l; k < numCols; k++)
+                                s += ((double)a[j][k] * (double)a[i][k]);
+                            for (k = l; k < numCols; k++)
+                                a[j][k] += (double)(s * rv1[k]);
+                        }
+                    }
+                    for (k = l; k < numCols; k++)
+                        a[i][k] = (double)((double)a[i][k]*scale);
+                }
+            }
+            anorm = max(anorm, (fabs((double)w[i]) + fabs(rv1[i])));
+        }
+        
+        /* accumulate the right-hand transformation */
+        for (i = numCols - 1; i >= 0; i--)
+        {
+            if (i < numCols - 1)
+            {
+                if (g)
+                {
+                    for (j = l; j < numCols; j++)
+                        v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g);
+                    /* double division to avoid underflow */
+                    for (j = l; j < numCols; j++)
+                    {
+                        for (s = 0.0, k = l; k < numCols; k++)
+                            s += ((double)a[i][k] * (double)v[k][j]);
+                        for (k = l; k < numCols; k++)
+                            v[k][j] += (double)(s * (double)v[k][i]);
+                    }
+                }
+                for (j = l; j < numCols; j++)
+                    v[i][j] = v[j][i] = 0.0;
+            }
+            v[i][i] = 1.0;
+            g = rv1[i];
+            l = i;
+        }
+        
+        /* accumulate the left-hand transformation */
+        for (i = numCols - 1; i >= 0; i--)
+        {
+            l = i + 1;
+            g = (double)w[i];
+            if (i < numCols - 1)
+                for (j = l; j < numCols; j++)
+                    a[i][j] = 0.0;
+            if (g)
+            {
+                g = 1.0 / g;
+                if (i != numCols - 1)
+                {
+                    for (j = l; j < numCols; j++)
+                    {
+                        for (s = 0.0, k = l; k < numRows; k++)
+                            s += ((double)a[k][i] * (double)a[k][j]);
+                        f = (s / (double)a[i][i]) * g;
+                        for (k = i; k < numRows; k++)
+                            a[k][j] += (double)(f * (double)a[k][i]);
+                    }
+                }
+                for (j = i; j < numRows; j++)
+                    a[j][i] = (double)((double)a[j][i]*g);
+            }
+            else
+            {
+                for (j = i; j < numRows; j++)
+                    a[j][i] = 0.0;
+            }
+            ++a[i][i];
+        }
+        
+        /* diagonalize the bidiagonal form */
+        for (k = numCols - 1; k >= 0; k--)
+        {                             /* loop over singular values */
+            for (its = 0; its < 30; its++)
+            {                         /* loop over allowed iterations */
+                flag = 1;
+                for (l = k; l >= 0; l--)
+                {                     /* test for splitting */
+                    nm = l - 1;
+                    if (fabs(rv1[l]) + anorm == anorm)
+                    {
+                        flag = 0;
+                        break;
+                    }
+                    if (fabs((double)w[nm]) + anorm == anorm)
+                        break;
+                }
+                if (flag)
+                {
+                    c = 0.0;
+                    s = 1.0;
+                    for (i = l; i <= k; i++)
+                    {
+                        f = s * rv1[i];
+                        if (fabs(f) + anorm != anorm)
+                        {
+                            g = (double)w[i];
+                            h = pythag(f, g);
+                            w[i] = (double)h;
+                            h = 1.0 / h;
+                            c = g * h;
+                            s = (- f * h);
+                            for (j = 0; j < numRows; j++)
+                            {
+                                y = (double)a[j][nm];
+                                z = (double)a[j][i];
+                                a[j][nm] = (double)(y * c + z * s);
+                                a[j][i] = (double)(z * c - y * s);
+                            }
+                        }
+                    }
+                }
+                z = (double)w[k];
+                if (l == k)
+                {                  /* convergence */
+                    if (z < 0.0)
+                    {              /* make singular value nonnegative */
+                        w[k] = (double)(-z);
+                        for (j = 0; j < numCols; j++)
+                            v[j][k] = (-v[j][k]);
+                    }
+                    break;
+                }
+                if (its >= 30) {
+                    m->mothurOut("No convergence after 30,000! iterations \n"); m->control_pressed = true;
+                    return(0);
+                }
+                
+                /* shift from bottom 2 x 2 minor */
+                x = (double)w[l];
+                nm = k - 1;
+                y = (double)w[nm];
+                g = rv1[nm];
+                h = rv1[k];
+                f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
+                g = pythag(f, 1.0);
+                f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x;
+                
+                /* next QR transformation */
+                c = s = 1.0;
+                for (j = l; j <= nm; j++)
+                {
+                    i = j + 1;
+                    g = rv1[i];
+                    y = (double)w[i];
+                    h = s * g;
+                    g = c * g;
+                    z = pythag(f, h);
+                    rv1[j] = z;
+                    c = f / z;
+                    s = h / z;
+                    f = x * c + g * s;
+                    g = g * c - x * s;
+                    h = y * s;
+                    y = y * c;
+                    for (jj = 0; jj < numCols; jj++)
+                    {
+                        x = (double)v[jj][j];
+                        z = (double)v[jj][i];
+                        v[jj][j] = (float)(x * c + z * s);
+                        v[jj][i] = (float)(z * c - x * s);
+                    }
+                    z = pythag(f, h);
+                    w[j] = (float)z;
+                    if (z) 
+                    {
+                        z = 1.0 / z;
+                        c = f * z;
+                        s = h * z;
+                    }
+                    f = (c * g) + (s * y);
+                    x = (c * y) - (s * g);
+                    for (jj = 0; jj < numRows; jj++)
+                    {
+                        y = (double)a[jj][j];
+                        z = (double)a[jj][i];
+                        a[jj][j] = (double)(y * c + z * s);
+                        a[jj][i] = (double)(z * c - y * s);
+                    }
+                }
+                rv1[l] = 0.0;
+                rv1[k] = f;
+                w[k] = (double)x;
+            }
+        }
+        
+        return(0);
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "svd");
+               exit(1);
+       }
+}
+
+