]> git.donarmstrong.com Git - mothur.git/blobdiff - calcsparcc.cpp
added sparcc command. fixed bug in remove.groups that removed all groups if you...
[mothur.git] / calcsparcc.cpp
diff --git a/calcsparcc.cpp b/calcsparcc.cpp
new file mode 100644 (file)
index 0000000..9d1d88a
--- /dev/null
@@ -0,0 +1,344 @@
+//
+//  runSparcc.cpp
+//  PDSSparCC
+//
+//  Created by Patrick Schloss on 10/31/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "calcSparcc.h"
+#include "linearalgebra.h"
+
+/**************************************************************************************************/
+
+CalcSparcc::CalcSparcc(vector<vector<float> > sharedVector, int maxIterations, int numSamplings, string method){
+    try {
+        m = MothurOut::getInstance();
+        numOTUs = (int)sharedVector[0].size();
+        numGroups = (int)sharedVector.size();
+        normalizationMethod = method;
+        int numOTUs = (int)sharedVector[0].size();
+        
+        addPseudoCount(sharedVector);
+        
+        vector<vector<vector<float> > > allCorrelations(numSamplings);
+        
+        //    float cycClockStart = clock();
+        //    unsigned long long cycTimeStart = time(NULL);
+        
+        for(int i=0;i<numSamplings;i++){
+            if (m->control_pressed) { break; }
+            vector<float> logFractions =  getLogFractions(sharedVector, method);
+            getT_Matrix(logFractions);     //this step is slow...
+            getT_Vector();
+            getD_Matrix();
+            vector<float> basisVariances = getBasisVariances();     //this step is slow...
+            vector<vector<float> > correlation = getBasisCorrelations(basisVariances);
+            
+            excluded.resize(numOTUs);
+            for(int j=0;j<numOTUs;j++){ excluded[j].assign(numOTUs, 0); }
+            
+            float maxRho = 1;
+            int excludeRow = -1;
+            int excludeColumn = -1;
+            
+            int iter = 0;
+            while(maxRho > 0.10 && iter < maxIterations){
+                maxRho = getExcludedPairs(correlation, excludeRow, excludeColumn);
+                excludeValues(excludeRow, excludeColumn);
+                vector<float> excludedBasisVariances = getBasisVariances();
+                correlation = getBasisCorrelations(excludedBasisVariances);
+                iter++;
+            }
+            allCorrelations[i] = correlation;
+        }
+        
+        if (!m->control_pressed) {
+            if(numSamplings > 1){
+                getMedian(allCorrelations);
+            }
+            else{
+                median = allCorrelations[0];
+            }
+        }
+//    cout << median[0][3] << '\t' << median[0][6] << endl;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "CalcSparcc");
+        exit(1);
+    }
+}
+    
+/**************************************************************************************************/
+
+void CalcSparcc::addPseudoCount(vector<vector<float> >& sharedVector){
+    try {
+        for(int i=0;i<numGroups;i++){   //iterate across the groups
+            if (m->control_pressed) { return; }
+            for(int j=0;j<numOTUs;j++){
+                sharedVector[i][j] += 1;
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "addPseudoCount");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getLogFractions(vector<vector<float> > sharedVector, string method){   //dirichlet by default
+    try {
+        vector<float> logSharedFractions(numGroups * numOTUs, 0);
+        
+        if(method == "dirichlet"){
+            vector<float> alphas(numGroups);
+            for(int i=0;i<numGroups;i++){   //iterate across the groups
+                if (m->control_pressed) { return logSharedFractions; }
+                alphas = RNG.randomDirichlet(sharedVector[i]);
+                
+                for(int j=0;j<numOTUs;j++){
+                    logSharedFractions[i * numOTUs + j] = alphas[j];
+                }
+            }
+        }
+        else if(method == "relabund"){
+            for(int i=0;i<numGroups;i++){
+                if (m->control_pressed) { return logSharedFractions; }
+                float total = 0.0;
+                for(int j=0;j<numOTUs;j++){
+                    total += sharedVector[i][j];
+                }
+                for(int j=0;j<numOTUs;j++){
+                    logSharedFractions[i * numOTUs + j] = sharedVector[i][j]/total;
+                }
+            }
+        }
+        
+        for(int i=0;i<logSharedFractions.size();i++){
+            logSharedFractions[i] = log(logSharedFractions[i]);
+        }
+        
+        return logSharedFractions;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "addPseudoCount");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Matrix(vector<float> sharedFractions){
+    try {
+        tMatrix.resize(numOTUs * numOTUs, 0);
+        
+        vector<float> diff(numGroups);
+        
+        for(int j1=0;j1<numOTUs;j1++){
+            for(int j2=0;j2<j1;j2++){
+                if (m->control_pressed) { return; }
+                float mean = 0.0;
+                for(int i=0;i<numGroups;i++){
+                    diff[i] = sharedFractions[i * numOTUs + j1] - sharedFractions[i * numOTUs + j2];
+                    mean += diff[i];
+                }
+                mean /= float(numGroups);
+                
+                float variance = 0.0;
+                for(int i=0;i<numGroups;i++){
+                    variance += (diff[i] - mean) * (diff[i] - mean);
+                }
+                variance /= (float)(numGroups-1);
+                
+                tMatrix[j1 * numOTUs + j2] = variance;
+                tMatrix[j2 * numOTUs + j1] = tMatrix[j1 * numOTUs + j2];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getT_Matrix");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Vector(){
+    try {
+        tVector.assign(numOTUs, 0);
+        
+        for(int j1=0;j1<numOTUs;j1++){
+            if (m->control_pressed) { return; }
+            for(int j2=0;j2<numOTUs;j2++){
+                tVector[j1] += tMatrix[j1 * numOTUs + j2];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getT_Vector");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getD_Matrix(){
+    try {
+        float d = numOTUs - 1.0;
+        
+        dMatrix.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { return; }
+            dMatrix[i].resize(numOTUs, 1);
+            dMatrix[i][i] = d;
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getD_Matrix");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getBasisVariances(){
+    try {
+        LinearAlgebra LA;
+        
+        vector<float> variances = LA.solveEquations(dMatrix, tVector);
+        
+        for(int i=0;i<variances.size();i++){
+            if (m->control_pressed) { return variances; }
+            if(variances[i] < 0){   variances[i] = 1e-4;    }
+        }
+        
+        return variances;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getBasisVariances");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+vector<vector<float> > CalcSparcc::getBasisCorrelations(vector<float> basisVariance){
+    try {
+        vector<vector<float> > rho(numOTUs);
+        for(int i=0;i<numOTUs;i++){ rho[i].resize(numOTUs, 0);    }
+        
+        for(int i=0;i<numOTUs;i++){
+            float var_i = basisVariance[i];
+            float sqrt_var_i = sqrt(var_i);
+            
+            rho[i][i] = 1.00;
+            
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return rho; }
+                float var_j = basisVariance[j];
+                
+                rho[i][j] = (var_i + var_j - tMatrix[i * numOTUs + j]) / (2.0 * sqrt_var_i * sqrt(var_j));
+                if(rho[i][j] > 1.0)         {   rho[i][j] = 1.0;   }
+                else if(rho[i][j] < -1.0)   {   rho[i][j] = -1.0;  }
+                
+                rho[j][i] = rho[i][j];
+                
+            }
+        }
+        
+        return rho;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getBasisCorrelations");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+float CalcSparcc::getExcludedPairs(vector<vector<float> > rho, int& maxRow, int& maxColumn){
+    try {
+        float maxRho = 0;
+        maxRow = -1;
+        maxColumn = -1;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return maxRho; }
+                float tester = abs(rho[i][j]);
+                
+                if(tester > maxRho && excluded[i][j] != 1){
+                    maxRho = tester;
+                    maxRow = i;
+                    maxColumn = j;
+                }
+            }
+            
+        }
+        
+        return maxRho;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getExcludedPairs");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::excludeValues(int excludeRow, int excludeColumn){
+    try { 
+        tVector[excludeRow] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+        tVector[excludeColumn] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+        
+        dMatrix[excludeRow][excludeColumn] = 0;
+        dMatrix[excludeColumn][excludeRow] = 0;
+        dMatrix[excludeRow][excludeRow]--;
+        dMatrix[excludeColumn][excludeColumn]--;
+        
+        excluded[excludeRow][excludeColumn] = 1;
+        excluded[excludeColumn][excludeRow] = 1;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "excludeValues");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getMedian(vector<vector<vector<float> > > allCorrelations){
+    try {
+        int numSamples = (int)allCorrelations.size();
+        median.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){ median[i].assign(numOTUs, 1);   }
+        
+        vector<float> hold(numSamples);
+        
+        for(int i=0;i<numOTUs;i++){
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return; }
+                
+                for(int k=0;k<numSamples;k++){
+                    hold[k] = allCorrelations[k][i][j];
+                }
+                
+                sort(hold.begin(), hold.end());
+                median[i][j] = hold[int(numSamples * 0.5)];
+                median[j][i] = median[i][j];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getMedian");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/