]> git.donarmstrong.com Git - mothur.git/commitdiff
added sparcc command. fixed bug in remove.groups that removed all groups if you...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 13 May 2013 15:05:36 +0000 (11:05 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 13 May 2013 15:05:36 +0000 (11:05 -0400)
16 files changed:
Mothur.xcodeproj/project.pbxproj
calcsparcc.cpp [new file with mode: 0644]
calcsparcc.h [new file with mode: 0644]
commandfactory.cpp
linearalgebra.cpp
linearalgebra.h
randomnumber.cpp [new file with mode: 0644]
randomnumber.h [new file with mode: 0644]
removegroupscommand.cpp
sffmultiplecommand.cpp
sharedrabundfloatvector.cpp
sharedrabundfloatvector.h
sparcccommand.cpp [new file with mode: 0644]
sparcccommand.h [new file with mode: 0644]
splitmatrix.cpp
treegroupscommand.cpp

index 284bb592f407196ae05100feb4f0c45160e68027..f670803d39b300ddc854194a429f76c406800ee3 100644 (file)
@@ -54,6 +54,9 @@
                A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
                A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
+               A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
+               A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
+               A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7189173D40E4002163C2 /* calcsparcc.cpp */; };
                A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E1937161B201E00DB1A2A /* randomforest.cpp */; };
                A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E193A161B289600DB1A2A /* rftreenode.cpp */; };
                A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; };
                A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
                A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
                A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
+               A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
+               A77B7184173D2240002163C2 /* sparcccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparcccommand.cpp; sourceTree = "<group>"; };
+               A77B7186173D4041002163C2 /* randomnumber.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomnumber.cpp; sourceTree = "<group>"; };
+               A77B7187173D4041002163C2 /* randomnumber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = randomnumber.h; sourceTree = "<group>"; };
+               A77B7189173D40E4002163C2 /* calcsparcc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = calcsparcc.cpp; sourceTree = "<group>"; };
+               A77B718A173D40E4002163C2 /* calcsparcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = calcsparcc.h; sourceTree = "<group>"; };
                A77E1937161B201E00DB1A2A /* randomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomforest.cpp; sourceTree = "<group>"; };
                A77E193A161B289600DB1A2A /* rftreenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rftreenode.cpp; sourceTree = "<group>"; };
                A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = "<group>"; };
                                A7A61F1A130035C800E05B6B /* LICENSE */,
                                A70332B512D3A13400761E33 /* makefile */,
                                A7E9B65912D37EC300DA6239 /* averagelinkage.cpp */,
+                               A77B718A173D40E4002163C2 /* calcsparcc.h */,
+                               A77B7189173D40E4002163C2 /* calcsparcc.cpp */,
                                A7E9BA4F12D398D700DA6239 /* clearcut */,
                                A7E9B69812D37EC400DA6239 /* cluster.cpp */,
                                A7E9B69912D37EC400DA6239 /* cluster.hpp */,
                                A7E9B79C12D37EC400DA6239 /* progress.hpp */,
                                A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
                                A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+                               A77B7187173D4041002163C2 /* randomnumber.h */,
+                               A77B7186173D4041002163C2 /* randomnumber.cpp */,
                                A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
                                A7386C191619C9FB00651424 /* randomforest */,
                                A7E9B7A612D37EC400DA6239 /* rarecalc.h */,
                                A774101314695AF60098E6AC /* shhhseqscommand.cpp */,
                                A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */,
                                A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */,
+                               A77B7183173D222F002163C2 /* sparcccommand.h */,
+                               A77B7184173D2240002163C2 /* sparcccommand.cpp */,
                                A7E9B84012D37EC400DA6239 /* splitabundcommand.h */,
                                A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
                                A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
                                A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
                                A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */,
                                A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */,
+                               A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */,
+                               A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */,
+                               A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/calcsparcc.cpp b/calcsparcc.cpp
new file mode 100644 (file)
index 0000000..9d1d88a
--- /dev/null
@@ -0,0 +1,344 @@
+//
+//  runSparcc.cpp
+//  PDSSparCC
+//
+//  Created by Patrick Schloss on 10/31/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "calcSparcc.h"
+#include "linearalgebra.h"
+
+/**************************************************************************************************/
+
+CalcSparcc::CalcSparcc(vector<vector<float> > sharedVector, int maxIterations, int numSamplings, string method){
+    try {
+        m = MothurOut::getInstance();
+        numOTUs = (int)sharedVector[0].size();
+        numGroups = (int)sharedVector.size();
+        normalizationMethod = method;
+        int numOTUs = (int)sharedVector[0].size();
+        
+        addPseudoCount(sharedVector);
+        
+        vector<vector<vector<float> > > allCorrelations(numSamplings);
+        
+        //    float cycClockStart = clock();
+        //    unsigned long long cycTimeStart = time(NULL);
+        
+        for(int i=0;i<numSamplings;i++){
+            if (m->control_pressed) { break; }
+            vector<float> logFractions =  getLogFractions(sharedVector, method);
+            getT_Matrix(logFractions);     //this step is slow...
+            getT_Vector();
+            getD_Matrix();
+            vector<float> basisVariances = getBasisVariances();     //this step is slow...
+            vector<vector<float> > correlation = getBasisCorrelations(basisVariances);
+            
+            excluded.resize(numOTUs);
+            for(int j=0;j<numOTUs;j++){ excluded[j].assign(numOTUs, 0); }
+            
+            float maxRho = 1;
+            int excludeRow = -1;
+            int excludeColumn = -1;
+            
+            int iter = 0;
+            while(maxRho > 0.10 && iter < maxIterations){
+                maxRho = getExcludedPairs(correlation, excludeRow, excludeColumn);
+                excludeValues(excludeRow, excludeColumn);
+                vector<float> excludedBasisVariances = getBasisVariances();
+                correlation = getBasisCorrelations(excludedBasisVariances);
+                iter++;
+            }
+            allCorrelations[i] = correlation;
+        }
+        
+        if (!m->control_pressed) {
+            if(numSamplings > 1){
+                getMedian(allCorrelations);
+            }
+            else{
+                median = allCorrelations[0];
+            }
+        }
+//    cout << median[0][3] << '\t' << median[0][6] << endl;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "CalcSparcc");
+        exit(1);
+    }
+}
+    
+/**************************************************************************************************/
+
+void CalcSparcc::addPseudoCount(vector<vector<float> >& sharedVector){
+    try {
+        for(int i=0;i<numGroups;i++){   //iterate across the groups
+            if (m->control_pressed) { return; }
+            for(int j=0;j<numOTUs;j++){
+                sharedVector[i][j] += 1;
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "addPseudoCount");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getLogFractions(vector<vector<float> > sharedVector, string method){   //dirichlet by default
+    try {
+        vector<float> logSharedFractions(numGroups * numOTUs, 0);
+        
+        if(method == "dirichlet"){
+            vector<float> alphas(numGroups);
+            for(int i=0;i<numGroups;i++){   //iterate across the groups
+                if (m->control_pressed) { return logSharedFractions; }
+                alphas = RNG.randomDirichlet(sharedVector[i]);
+                
+                for(int j=0;j<numOTUs;j++){
+                    logSharedFractions[i * numOTUs + j] = alphas[j];
+                }
+            }
+        }
+        else if(method == "relabund"){
+            for(int i=0;i<numGroups;i++){
+                if (m->control_pressed) { return logSharedFractions; }
+                float total = 0.0;
+                for(int j=0;j<numOTUs;j++){
+                    total += sharedVector[i][j];
+                }
+                for(int j=0;j<numOTUs;j++){
+                    logSharedFractions[i * numOTUs + j] = sharedVector[i][j]/total;
+                }
+            }
+        }
+        
+        for(int i=0;i<logSharedFractions.size();i++){
+            logSharedFractions[i] = log(logSharedFractions[i]);
+        }
+        
+        return logSharedFractions;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "addPseudoCount");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Matrix(vector<float> sharedFractions){
+    try {
+        tMatrix.resize(numOTUs * numOTUs, 0);
+        
+        vector<float> diff(numGroups);
+        
+        for(int j1=0;j1<numOTUs;j1++){
+            for(int j2=0;j2<j1;j2++){
+                if (m->control_pressed) { return; }
+                float mean = 0.0;
+                for(int i=0;i<numGroups;i++){
+                    diff[i] = sharedFractions[i * numOTUs + j1] - sharedFractions[i * numOTUs + j2];
+                    mean += diff[i];
+                }
+                mean /= float(numGroups);
+                
+                float variance = 0.0;
+                for(int i=0;i<numGroups;i++){
+                    variance += (diff[i] - mean) * (diff[i] - mean);
+                }
+                variance /= (float)(numGroups-1);
+                
+                tMatrix[j1 * numOTUs + j2] = variance;
+                tMatrix[j2 * numOTUs + j1] = tMatrix[j1 * numOTUs + j2];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getT_Matrix");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Vector(){
+    try {
+        tVector.assign(numOTUs, 0);
+        
+        for(int j1=0;j1<numOTUs;j1++){
+            if (m->control_pressed) { return; }
+            for(int j2=0;j2<numOTUs;j2++){
+                tVector[j1] += tMatrix[j1 * numOTUs + j2];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getT_Vector");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getD_Matrix(){
+    try {
+        float d = numOTUs - 1.0;
+        
+        dMatrix.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { return; }
+            dMatrix[i].resize(numOTUs, 1);
+            dMatrix[i][i] = d;
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getD_Matrix");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getBasisVariances(){
+    try {
+        LinearAlgebra LA;
+        
+        vector<float> variances = LA.solveEquations(dMatrix, tVector);
+        
+        for(int i=0;i<variances.size();i++){
+            if (m->control_pressed) { return variances; }
+            if(variances[i] < 0){   variances[i] = 1e-4;    }
+        }
+        
+        return variances;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getBasisVariances");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+vector<vector<float> > CalcSparcc::getBasisCorrelations(vector<float> basisVariance){
+    try {
+        vector<vector<float> > rho(numOTUs);
+        for(int i=0;i<numOTUs;i++){ rho[i].resize(numOTUs, 0);    }
+        
+        for(int i=0;i<numOTUs;i++){
+            float var_i = basisVariance[i];
+            float sqrt_var_i = sqrt(var_i);
+            
+            rho[i][i] = 1.00;
+            
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return rho; }
+                float var_j = basisVariance[j];
+                
+                rho[i][j] = (var_i + var_j - tMatrix[i * numOTUs + j]) / (2.0 * sqrt_var_i * sqrt(var_j));
+                if(rho[i][j] > 1.0)         {   rho[i][j] = 1.0;   }
+                else if(rho[i][j] < -1.0)   {   rho[i][j] = -1.0;  }
+                
+                rho[j][i] = rho[i][j];
+                
+            }
+        }
+        
+        return rho;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getBasisCorrelations");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+float CalcSparcc::getExcludedPairs(vector<vector<float> > rho, int& maxRow, int& maxColumn){
+    try {
+        float maxRho = 0;
+        maxRow = -1;
+        maxColumn = -1;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return maxRho; }
+                float tester = abs(rho[i][j]);
+                
+                if(tester > maxRho && excluded[i][j] != 1){
+                    maxRho = tester;
+                    maxRow = i;
+                    maxColumn = j;
+                }
+            }
+            
+        }
+        
+        return maxRho;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getExcludedPairs");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::excludeValues(int excludeRow, int excludeColumn){
+    try { 
+        tVector[excludeRow] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+        tVector[excludeColumn] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+        
+        dMatrix[excludeRow][excludeColumn] = 0;
+        dMatrix[excludeColumn][excludeRow] = 0;
+        dMatrix[excludeRow][excludeRow]--;
+        dMatrix[excludeColumn][excludeColumn]--;
+        
+        excluded[excludeRow][excludeColumn] = 1;
+        excluded[excludeColumn][excludeRow] = 1;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "excludeValues");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getMedian(vector<vector<vector<float> > > allCorrelations){
+    try {
+        int numSamples = (int)allCorrelations.size();
+        median.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){ median[i].assign(numOTUs, 1);   }
+        
+        vector<float> hold(numSamples);
+        
+        for(int i=0;i<numOTUs;i++){
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return; }
+                
+                for(int k=0;k<numSamples;k++){
+                    hold[k] = allCorrelations[k][i][j];
+                }
+                
+                sort(hold.begin(), hold.end());
+                median[i][j] = hold[int(numSamples * 0.5)];
+                median[j][i] = median[i][j];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getMedian");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
diff --git a/calcsparcc.h b/calcsparcc.h
new file mode 100644 (file)
index 0000000..05eff85
--- /dev/null
@@ -0,0 +1,57 @@
+
+#ifndef PDSSparCC_runSparcc_h
+#define PDSSparCC_runSparcc_h
+
+//
+//  runSparcc.h
+//  PDSSparCC
+//
+//  Created by Patrick Schloss on 10/31/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+/**************************************************************************************************/
+
+//#include "sparcc.h"
+#include "randomnumber.h"
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class CalcSparcc {
+       
+public:
+       CalcSparcc(vector<vector<float> >, int, int, string);
+    vector<vector<float> > getRho()    {   return median;  }
+private:
+    MothurOut* m;
+    void addPseudoCount(vector<vector<float> >&);
+    vector<float> getLogFractions(vector<vector<float> >, string);
+    void getT_Matrix(vector<float>);
+    
+    
+    void getT_Vector();
+    void getD_Matrix();
+    vector<float> getBasisVariances();
+    vector<vector<float> > getBasisCorrelations(vector<float>);
+    float getExcludedPairs(vector<vector<float> >, int&, int&);
+    void excludeValues(int, int);
+    void getMedian(vector<vector<vector<float> > >);
+
+    vector<float> tMatrix;
+
+    vector<vector<float> > dMatrix;
+    vector<float> tVector;
+    vector<vector<int> > excluded;
+    vector<vector<float> > median;
+    
+    int numOTUs;
+    int numGroups;
+    string normalizationMethod;
+    
+    RandomNumberGenerator RNG;    
+};
+
+#endif
+
+/**************************************************************************************************/
index 6c6bc17bb3dfd47f31b68facd01d6e0724cf3d8d..cf9984418a9099ef99d96f802d5328dddd5072d8 100644 (file)
 #include "removedistscommand.h"
 #include "mergetaxsummarycommand.h"
 #include "getmetacommunitycommand.h"
+#include "sparcccommand.h"
 
 /*******************************************************/
 
@@ -307,6 +308,7 @@ CommandFactory::CommandFactory(){
     commands["remove.dists"]        = "remove.dists";
     commands["merge.taxsummary"]    = "merge.taxsummary";
     commands["get.metacommunity"]   = "get.metacommunity";
+    commands["sparcc"]              = "sparcc";
     
 
 }
@@ -528,6 +530,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "remove.dists")          {      command = new RemoveDistsCommand(optionString);             }
         else if(commandName == "merge.taxsummary")      {      command = new MergeTaxSummaryCommand(optionString);         }
         else if(commandName == "get.metacommunity")     {      command = new GetMetaCommunityCommand(optionString);        }
+        else if(commandName == "sparcc")                {      command = new SparccCommand(optionString);                  }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -690,6 +693,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "remove.dists")          {      pipecommand = new RemoveDistsCommand(optionString);             }
         else if(commandName == "merge.taxsummary")      {      pipecommand = new MergeTaxSummaryCommand(optionString);         }
         else if(commandName == "get.metacommunity")     {      pipecommand = new GetMetaCommunityCommand(optionString);        }
+        else if(commandName == "sparcc")                {      pipecommand = new SparccCommand(optionString);                  }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -838,6 +842,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "remove.dists")          {      shellcommand = new RemoveDistsCommand();            }
         else if(commandName == "merge.taxsummary")      {      shellcommand = new MergeTaxSummaryCommand();        }
         else if(commandName == "get.metacommunity")     {      shellcommand = new GetMetaCommunityCommand();       }
+        else if(commandName == "sparcc")                {      shellcommand = new SparccCommand();                 }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 58f1acccfc7ed4e3aefc902ad199dd7a17a0516d..e1b9470fdb218d94fd9f5fe9f36ccb23ac31ea8a 100644 (file)
@@ -1530,6 +1530,25 @@ vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<d
                exit(1);
        }
 }
+/*********************************************************************************************************************************/
+vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
+    try {
+        int length = (int)b.size();
+        vector<double> x(length, 0);
+        vector<int> index(length);
+        for(int i=0;i<length;i++){  index[i] = i;   }
+        float d;
+        
+        ludcmp(A, index, d);  if (m->control_pressed) { return b; }
+        lubksb(A, index, b);
+        
+        return b;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "solveEquations");
+               exit(1);
+       }
+}
 
 /*********************************************************************************************************************************/
 
@@ -1628,6 +1647,104 @@ void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vecto
                exit(1);
        }
 }
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
+    try {
+        double tiny = 1e-20;
+        
+        int n = (int)A.size();
+        vector<float> vv(n, 0.0);
+        double temp;
+        int imax;
+        
+        d = 1.0;
+        
+        for(int i=0;i<n;i++){
+            float big = 0.0;
+            for(int j=0;j<n;j++){   if((temp=fabs(A[i][j])) > big ) big=temp;  }
+            if(big==0.0){   m->mothurOut("Singular matrix in routine ludcmp\n");    }
+            vv[i] = 1.0/big;
+        }
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            for(int i=0;i<j;i++){
+                float sum = A[i][j];
+                for(int k=0;k<i;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+            }
+            
+            float big = 0.0;
+            for(int i=j;i<n;i++){
+                float sum = A[i][j];
+                for(int k=0;k<j;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+                float dum;
+                if((dum = vv[i] * fabs(sum)) >= big){
+                    big = dum;
+                    imax = i;
+                }
+            }
+            if(j != imax){
+                for(int k=0;k<n;k++){
+                    float dum = A[imax][k];
+                    A[imax][k] = A[j][k];
+                    A[j][k] = dum;
+                }
+                d = -d;
+                vv[imax] = vv[j];
+            }
+            index[j] = imax;
+            
+            if(A[j][j] == 0.0){ A[j][j] = tiny; }
+            
+            if(j != n-1){
+                float dum = 1.0/A[j][j];
+                for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+            }
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "ludcmp");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
+    try {
+        float total;
+        int n = (int)A.size();
+        int ii = 0;
+        
+        for(int i=0;i<n;i++){
+            if (m->control_pressed) { break; }
+            int ip = index[i];
+            total = b[ip];
+            b[ip] = b[i];
+            
+            if (ii != 0) {
+                for(int j=ii-1;j<i;j++){
+                    total -= A[i][j] * b[j];
+                }
+            }
+            else if(total != 0){  ii = i+1;   }
+            b[i] = total;
+        }
+        for(int i=n-1;i>=0;i--){
+            total = b[i];
+            for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j];  }
+            b[i] = total / A[i][i];
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "lubksb");
+               exit(1);
+       }
+}
+
 
 /*********************************************************************************************************************************/
 
index ef9e03cde900b4e01bdd61a99083801c0d25b913..e0933ee52564915d35bcf6e9f91f73c2cea80d2d 100644 (file)
@@ -39,6 +39,7 @@ public:
     double calcKendallSig(double, double); //length, coeff.
     
     vector<double> solveEquations(vector<vector<double> >, vector<double>);
+    vector<float> solveEquations(vector<vector<float> >, vector<float>);
     vector<vector<double> > getInverse(vector<vector<double> >);
 
     
@@ -64,6 +65,9 @@ private:
     
     void ludcmp(vector<vector<double> >&, vector<int>&, double&);
     void lubksb(vector<vector<double> >&, vector<int>&, vector<double>&);
+    
+    void ludcmp(vector<vector<float> >&, vector<int>&, float&);
+    void lubksb(vector<vector<float> >&, vector<int>&, vector<float>&);
 
     
 };
diff --git a/randomnumber.cpp b/randomnumber.cpp
new file mode 100644 (file)
index 0000000..a558fd3
--- /dev/null
@@ -0,0 +1,315 @@
+#ifndef RANDOMNUMBER_H
+#define RANDOMNUMBER_H
+
+/*
+ *  randomnumber.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 7/6/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "randomnumber.h"
+#include <cmath>
+
+/**************************************************************************************************/
+
+RandomNumberGenerator::RandomNumberGenerator(){
+    srand( (unsigned)time( NULL ) );
+}
+
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomUniform(){
+       
+       float randUnif = 0.0000;
+       
+       while(randUnif == 0.0000){
+               
+               randUnif = rand() / (float)RAND_MAX;
+               
+       }
+       
+       return randUnif;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomExp(){
+       
+       float randExp = 0.0000;
+       
+       while(randExp == 0.0000){
+               
+               randExp = -log(randomUniform());
+               
+       }
+       
+       return randExp;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomNorm(){
+       
+       float x, y, rsquare;
+       
+       do{
+               x = 2.0 * randomUniform() - 1.0;
+               y = 2.0 * randomUniform() - 1.0;
+       
+               rsquare = x * x + y * y;
+       } while(rsquare >= 1.0 || rsquare == 0.0);
+       
+       float fac = sqrt(-2.0 * log(rsquare)/rsquare);
+
+       return x * fac;
+}
+
+
+/**************************************************************************************************/
+/*
+ *     Slightly modified version of:
+ *
+ *  Mathlib : A C Library of Special Functions
+ *  Copyright (C) 1998 Ross Ihaka
+ *  Copyright (C) 2000-2005 The R Development Core Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, a copy is available at
+ *  http://www.r-project.org/Licenses/
+ *
+ *  SYNOPSIS
+ *
+ *    #include <Rmath.h>
+ *    float rgamma(float a, float scale);
+ *
+ *  DESCRIPTION
+ *
+ *    Random variates from the gamma distribution.
+ *
+ *  REFERENCES
+ *
+ *    [1] Shape parameter a >= 1.  Algorithm GD in:
+ *
+ *        Ahrens, J.H. and Dieter, U. (1982).
+ *        Generating gamma variates by a modified
+ *        rejection technique.
+ *        Comm. ACM, 25, 47-54.
+ *
+ *
+ *    [2] Shape parameter 0 < a < 1. Algorithm GS in:
+ *
+ *        Ahrens, J.H. and Dieter, U. (1974).
+ *        Computer methods for sampling from gamma, beta,
+ *        poisson and binomial distributions.
+ *        Computing, 12, 223-246.
+ *
+ *    Input: a = parameter (mean) of the standard gamma distribution.
+ *    Output: a variate from the gamma(a)-distribution
+ */
+
+float RandomNumberGenerator::randomGamma(float a)
+{
+       /* Constants : */
+    const static float sqrt32 = 5.656854;
+    const static float exp_m1 = 0.36787944117144232159;/* exp(-1) = 1/e */
+       float scale = 1.0;
+       
+    /* Coefficients q[k] - for q0 = sum(q[k]*a^(-k))
+     * Coefficients a[k] - for q = q0+(t*t/2)*sum(a[k]*v^k)
+     * Coefficients e[k] - for exp(q)-1 = sum(e[k]*q^k)
+     */
+    const static float q1 = 0.04166669;
+    const static float q2 = 0.02083148;
+    const static float q3 = 0.00801191;
+    const static float q4 = 0.00144121;
+    const static float q5 = -7.388e-5;
+    const static float q6 = 2.4511e-4;
+    const static float q7 = 2.424e-4;
+       
+    const static float a1 = 0.3333333;
+    const static float a2 = -0.250003;
+    const static float a3 = 0.2000062;
+    const static float a4 = -0.1662921;
+    const static float a5 = 0.1423657;
+    const static float a6 = -0.1367177;
+    const static float a7 = 0.1233795;
+       
+    /* State variables [FIXME for threading!] :*/
+    static float aa = 0.;
+    static float aaa = 0.;
+    static float s, s2, d;    /* no. 1 (step 1) */
+    static float q0, b, si, c;/* no. 2 (step 4) */
+       
+    float e, p, q, r, t, u, v, w, x, ret_val;
+       
+    if (a <= 0.0 || scale <= 0.0){     cout << "error alpha or scale parameter are less than zero." << endl; exit(1);  }
+               
+    if (a < 1.) { /* GS algorithm for parameters a < 1 */
+        e = 1.0 + exp_m1 * a;
+        for(;;) {
+            p = e * randomUniform();
+            if (p >= 1.0) {
+                x = -log((e - p) / a);
+                if (randomExp() >= (1.0 - a) * log(x))
+                    break;
+            } else {
+                x = exp(log(p) / a);
+                if (randomExp() >= x)
+                    break;
+            }
+        }
+        return scale * x;
+    }
+       
+    /* --- a >= 1 : GD algorithm --- */
+       
+    /* Step 1: Recalculations of s2, s, d if a has changed */
+    if (a != aa) {
+        aa = a;
+        s2 = a - 0.5;
+        s = sqrt(s2);
+        d = sqrt32 - s * 12.0;
+    }
+    /* Step 2: t = standard normal deviate,
+        x = (s,1/2) -normal deviate. */
+       
+    /* immediate acceptance (i) */
+    t = randomNorm();
+    x = s + 0.5 * t;
+    ret_val = x * x;
+    if (t >= 0.0)
+        return scale * ret_val;
+       
+    /* Step 3: u = 0,1 - uniform sample. squeeze acceptance (s) */
+    u = randomUniform();
+    if (d * u <= t * t * t)
+        return scale * ret_val;
+       
+    /* Step 4: recalculations of q0, b, si, c if necessary */
+       
+    if (a != aaa) {
+        aaa = a;
+        r = 1.0 / a;
+        q0 = ((((((q7 * r + q6) * r + q5) * r + q4) * r + q3) * r
+               + q2) * r + q1) * r;
+               
+        /* Approximation depending on size of parameter a */
+        /* The constants in the expressions for b, si and c */
+        /* were established by numerical experiments */
+               
+        if (a <= 3.686) {
+            b = 0.463 + s + 0.178 * s2;
+            si = 1.235;
+            c = 0.195 / s - 0.079 + 0.16 * s;
+        } else if (a <= 13.022) {
+            b = 1.654 + 0.0076 * s2;
+            si = 1.68 / s + 0.275;
+            c = 0.062 / s + 0.024;
+        } else {
+            b = 1.77;
+            si = 0.75;
+            c = 0.1515 / s;
+        }
+    }
+    /* Step 5: no quotient test if x not positive */
+       
+    if (x > 0.0) {
+        /* Step 6: calculation of v and quotient q */
+        v = t / (s + s);
+        if (fabs(v) <= 0.25)
+            q = q0 + 0.5 * t * t * ((((((a7 * v + a6) * v + a5) * v + a4) * v
+                                      + a3) * v + a2) * v + a1) * v;
+        else
+            q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+               
+               
+        /* Step 7: quotient acceptance (q) */
+        if (log(1.0 - u) <= q)
+            return scale * ret_val;
+    }
+       
+    for(;;) {
+        /* Step 8: e = standard exponential deviate
+         *      u =  0,1 -uniform deviate
+         *      t = (b,si)-float exponential (laplace) sample */
+        e = randomExp();
+        u = randomUniform();
+        u = u + u - 1.0;
+        if (u < 0.0)
+            t = b - si * e;
+        else
+            t = b + si * e;
+        /* Step  9:  rejection if t < tau(1) = -0.71874483771719 */
+        if (t >= -0.71874483771719) {
+            /* Step 10:  calculation of v and quotient q */
+            v = t / (s + s);
+            if (fabs(v) <= 0.25)
+                q = q0 + 0.5 * t * t *
+                               ((((((a7 * v + a6) * v + a5) * v + a4) * v + a3) * v
+                                 + a2) * v + a1) * v;
+            else
+                q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+            /* Step 11:  hat acceptance (h) */
+            /* (if q not positive go to step 8) */
+            if (q > 0.0) {
+                w = expm1(q);
+                /*  ^^^^^ original code had approximation with rel.err < 2e-7 */
+                /* if t is rejected sample again at step 8 */
+                if (c * fabs(u) <= w * exp(e - 0.5 * t * t))
+                    break;
+            }
+        }
+    } /* for(;;) .. until  `t' is accepted */
+    x = s + 0.5 * t;
+    return scale * x * x;
+}
+
+/**************************************************************************************************/
+//
+//     essentially swiped from http://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
+//
+/**************************************************************************************************/
+
+vector<float> RandomNumberGenerator::randomDirichlet(vector<float> alphas){
+
+       int nAlphas = (int)alphas.size();
+       vector<float> dirs(nAlphas, 0.0000);
+       
+       float sum = 0.0000;
+       for(int i=0;i<nAlphas;i++){
+               dirs[i] = randomGamma(alphas[i]);
+               sum += dirs[i];
+       }
+       
+       for(int i=0;i<nAlphas;i++){
+               dirs[i] /= sum;
+       }
+       
+       return dirs;
+       
+}
+
+/**************************************************************************************************/
+
+#endif
diff --git a/randomnumber.h b/randomnumber.h
new file mode 100644 (file)
index 0000000..59f94de
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef RANDOMNUMBER
+#define RANDOMNUMBER
+
+/*
+ *  randomnumber.h
+ *  
+ *
+ *  Created by Pat Schloss on 7/6/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+/**************************************************************************************************/
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class RandomNumberGenerator {
+       
+public:
+       RandomNumberGenerator();
+    float randomUniform();
+       float randomExp();
+       float randomNorm();
+       float randomGamma(float);
+       vector<float> randomDirichlet(vector<float> alphas);
+       
+};
+
+/**************************************************************************************************/
+
+#endif
index 1e5fe11de39f34b30f59249895bcf393506e32f9..64f9ab6822301310e975e6bbe58736e953eed4f2 100644 (file)
@@ -355,11 +355,19 @@ int RemoveGroupsCommand::execute(){
                        
                        //make sure groups are valid
                        //takes care of user setting groupNames that are invalid or setting groups=all
-                       SharedUtil* util = new SharedUtil();
                        vector<string> namesGroups = groupMap->getNamesOfGroups();
-                       util->setGroups(Groups, namesGroups);
-                       delete util;
-                       
+                       vector<string> checkedGroups;
+            for (int i = 0; i < Groups.size(); i++) {
+                if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
+                else {  m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
+            }
+            
+            if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
+                       else {
+                Groups = checkedGroups;
+                m->setGroups(Groups);
+            }
+            
                        //fill names with names of sequences that are from the groups we want to remove 
                        fillNames();
                        
index b6894b48fd1371cb1b963e5a0e1714dc6cdf24b8..eea8ffaeb36eef4dcbcc5a1b2bde6860478fc113 100644 (file)
@@ -233,11 +233,20 @@ SffMultipleCommand::SffMultipleCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
             
-                       flowOrder = validParameter.validFile(parameters, "order", false);
-                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
-                       else if(flowOrder.length() != 4){
-                               m->mothurOut("The value of the order option must be four bases long\n");
-                       }
+                       temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+            }
+            else {
+                if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
+                else if(toupper(temp[0]) == 'B'){
+                    flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
+                else if(toupper(temp[0]) == 'I'){
+                    flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
+                else {
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+                }
+            }
+
             
             temp = validParameter.validFile(parameters, "cutoff", false);      if (temp == "not found"){       temp = "0.01";          }
                        m->mothurConvert(temp, cutoff); 
index fa32fc43a873d22482cfcb8e4172343f3516cbce..71c868b158ab41c2b8d72e7dac301998e2c39537 100644 (file)
@@ -191,6 +191,16 @@ float SharedRAbundFloatVector::getAbundance(int index){
        return data[index].abundance;   
 }
 /***********************************************************************/
+//returns vector of abundances
+vector<float> SharedRAbundFloatVector::getAbundances(){
+    vector<float> abunds;
+    for (int i = 0; i < data.size(); i++) {
+        abunds.push_back(data[i].abundance);
+    }
+    
+       return abunds;
+}
+/***********************************************************************/
 individualFloat SharedRAbundFloatVector::get(int index){
        return data[index];     
 }
index 965b03ae7aebaf3e499ef55264a0d9ebe421380b..f8d69a8a5a17d4b9cadcf0e5c500d93498f601fc 100644 (file)
@@ -45,6 +45,7 @@ public:
        individualFloat get(int);
        vector <individual> getData();
        float getAbundance(int);
+    vector<float> getAbundances();
        void push_front(float, int, string); //abundance, otu, groupname
        void insert(float, int, string); //abundance, otu, groupname
        void push_back(float, string);  //abundance, groupname
diff --git a/sparcccommand.cpp b/sparcccommand.cpp
new file mode 100644 (file)
index 0000000..a7f5d78
--- /dev/null
@@ -0,0 +1,579 @@
+//
+//  sparcccommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 5/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sparcccommand.h"
+
+
+//**********************************************************************************************************************
+vector<string> SparccCommand::setParameters(){
+       try {
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter psamplings("samplings", "Number", "", "20", "", "", "","",false,false,false); parameters.push_back(psamplings);
+        CommandParameter piterations("iterations", "Number", "", "10", "", "", "","",false,false,false); parameters.push_back(piterations);
+        CommandParameter ppermutations("permutations", "Number", "", "1000", "", "", "","",false,false,false); parameters.push_back(ppermutations);
+        CommandParameter pmethod("method", "Multiple", "relabund-dirichlet", "dirichlet", "", "", "","",false,false); parameters.push_back(pmethod);
+        CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); 
+        //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SparccCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The sparcc command allows you to ....\n";
+               helpString += "The sparcc command parameters are: shared, groups, label, samplings, iterations, permutations, processors and method.\n";
+               helpString += "The samplings parameter is used to .... Default=20.\n";
+        helpString += "The iterations parameter is used to ....Default=10.\n";
+        helpString += "The permutations parameter is used to ....Default=1000.\n";
+        helpString += "The method parameter is used to ....Options are relabund and dirichlet. Default=dirichlet.\n";
+        helpString += "The default value for groups is all the groups in your sharedfile.\n";
+               helpString += "The label parameter is used to analyze specific labels in your shared file.\n";
+               helpString += "The sparcc command should be in the following format: sparcc(shared=yourSharedFile)\n";
+               helpString += "sparcc(shared=final.an.shared)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SparccCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "corr") {  pattern = "[filename],[distance],sparcc_correlation"; }
+        else if (type == "pvalue") {  pattern = "[filename],[distance],sparcc_pvalue"; }
+        else if (type == "sparccrelabund") {  pattern = "[filename],[distance],sparcc_relabund"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SparccCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["corr"] = tempOutNames; 
+               outputTypes["pvalue"] = tempOutNames;
+               outputTypes["sparccrelabund"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "SparccCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+        allLines = 1;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["corr"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos...
+            outputTypes["pvalue"] = tempOutNames;
+            outputTypes["sparccrelabund"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                 
+                               string path;
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+                        
+                       //check for parameters
+            //get shared file, it is required
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+                       }
+            
+                       normalizeMethod = validParameter.validFile(parameters, "method", false);
+                       if (normalizeMethod == "not found") { normalizeMethod = "dirichlet"; }
+                       if ((normalizeMethod == "dirichlet") || (normalizeMethod == "relabund")) { }
+                       else { m->mothurOut(normalizeMethod + " is not a valid method.  Valid methods are dirichlet and relabund."); m->mothurOutEndLine(); abort = true; }
+            
+            
+            string temp = validParameter.validFile(parameters, "samplings", false);    if (temp == "not found"){       temp = "20";    }
+                       m->mothurConvert(temp, numSamplings);
+            
+            if(normalizeMethod == "relabund"){ numSamplings = 1; }
+            
+            temp = validParameter.validFile(parameters, "iterations", false);  if (temp == "not found"){       temp = "10";    }
+                       m->mothurConvert(temp, maxIterations);
+            
+            temp = validParameter.validFile(parameters, "permutations", false);        if (temp == "not found"){       temp = "1000";  }
+                       m->mothurConvert(temp, numPermutations);
+            
+            temp = validParameter.validFile(parameters, "processors", false);  if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors);
+    
+            string groups = validParameter.validFile(parameters, "groups", false);
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups); }
+                       m->setGroups(Groups);
+            
+            string label = validParameter.validFile(parameters, "label", false);
+                       if (label == "not found") { label = ""; }
+                       else {
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "SparccCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SparccCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        int start = time(NULL);
+        
+        InputData input(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+        string lastLabel = lookup[0]->getLabel();
+        
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;
+        
+        //as long as you are not at the end of the file or done wih the lines you want
+        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            
+            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
+            
+            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+            }
+            
+            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);                
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+            }
+            
+            lastLabel = lookup[0]->getLabel();
+            //prevent memory leak
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+            
+            if (m->control_pressed) { return 0; }
+            
+            //get next line to process
+            lookup = input.getSharedRAbundVectors();
+        }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+        
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {
+            m->mothurOut("Your file does not include the label " + *it);
+            if (processedLabels.count(lastLabel) != 1) {
+                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                needToRun = true;
+            }else {
+                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+            }
+        }
+        
+        //run last label if you need to
+        if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            
+            process(lookup);           
+            
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+        }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+        
+        m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to process.");
+        m->mothurOutEndLine();
+        m->mothurOutEndLine();
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "execute");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<vector<float> > SparccCommand::shuffleSharedVector(vector<vector<float> >& sharedVector){
+    try {
+        int numGroups = (int)sharedVector.size();
+        int numOTUs = (int)sharedVector[0].size();
+        
+        vector<vector<float> > shuffledVector = sharedVector;
+        
+        for(int i=0;i<numGroups;i++){
+            for(int j=0;j<numOTUs;j++){
+                shuffledVector[i][j] = sharedVector[rand()%numGroups][j];
+            }
+        }
+        
+        return shuffledVector;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
+       try {
+        cout.setf(ios::fixed, ios::floatfield);
+        cout.setf(ios::showpoint);
+        
+        vector<vector<float> > sharedVector;
+        vector<string> otuNames = m->currentBinLabels;
+        
+        //fill sharedVector to pass to CalcSparcc
+        for (int i = 0; i < lookup.size(); i++) {
+            vector<int> abunds = lookup[i]->getAbundances();
+            vector<float> temp;
+            for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); }
+            sharedVector.push_back(temp);
+        }
+        int numOTUs = (int)sharedVector[0].size();
+        int numGroups = lookup.size();
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+        
+
+        string relAbundFileName = getOutputFileName("sparccrelabund", variables);
+        ofstream relAbundFile;
+        m->openOutputFile(relAbundFileName, relAbundFile);
+        outputNames.push_back(relAbundFileName);  outputTypes["sparccrelabund"].push_back(relAbundFileName);
+        
+        relAbundFile << "OTU\taveRelAbund\n";
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { relAbundFile.close(); return 0; }
+            
+            double relAbund = 0.0000;
+            for(int j=0;j<numGroups;j++){
+                relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs();
+            }
+            relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl;
+        }
+        relAbundFile.close();
+        
+        CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod);
+        vector<vector<float> > origCorrMatrix = originalData.getRho();
+        
+        string correlationFileName = getOutputFileName("corr", variables);
+        ofstream correlationFile;
+        m->openOutputFile(correlationFileName, correlationFile);
+        outputNames.push_back(correlationFileName);  outputTypes["corr"].push_back(correlationFileName);
+        correlationFile.setf(ios::fixed, ios::floatfield);
+        correlationFile.setf(ios::showpoint);
+        
+        for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i];    }   correlationFile << endl;
+        for(int i=0;i<numOTUs;i++){
+            correlationFile << otuNames[i];
+            for(int j=0;j<numOTUs;j++){
+                correlationFile << '\t' << origCorrMatrix[i][j];
+            }
+            correlationFile << endl;
+        }
+        
+        
+        if(numPermutations != 0){
+            vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix);
+            
+            if (m->control_pressed) { return 0; }
+            
+            string pValueFileName = getOutputFileName("pvalue", variables);
+            ofstream pValueFile;
+            m->openOutputFile(pValueFileName, pValueFile);
+            outputNames.push_back(pValueFileName);  outputTypes["pvalue"].push_back(pValueFileName);
+            pValueFile.setf(ios::fixed, ios::floatfield);
+            pValueFile.setf(ios::showpoint);
+            
+            for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i];    }   pValueFile << endl;
+            for(int i=0;i<numOTUs;i++){
+                pValueFile << otuNames[i];
+                for(int j=0;j<numOTUs;j++){
+                    pValueFile << '\t' << pValues[i][j];
+                }
+                pValueFile << endl;
+            }
+        }
+
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::createProcesses(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix){
+       try {
+        int numOTUs = sharedVector[0].size();
+        vector<vector<float> > pValues;
+        
+        if(processors == 1){
+                       pValues = driver(sharedVector, origCorrMatrix, numPermutations);
+               }else{
+            //divide iters between processors
+                       vector<int> procIters;
+            int numItersPerProcessor = numPermutations / processors;
+            
+            //divide iters between processes
+            for (int h = 0; h < processors; h++) {
+                if(h == processors - 1){ numItersPerProcessor = numPermutations - h * numItersPerProcessor; }
+                procIters.push_back(numItersPerProcessor);
+            }
+            
+            vector<int> processIDS;
+            int process = 1;
+                       
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            
+                       //loop through and create all the processes you want
+                       while (process != processors) {
+                               int pid = fork();
+                               
+                               if (pid > 0) {
+                                       processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                                       process++;
+                               }else if (pid == 0){
+                                       pValues = driver(sharedVector, origCorrMatrix, procIters[process]);
+                                       
+                                       //pass pvalues to parent
+                                       ofstream out;
+                                       string tempFile = toString(getpid()) + ".pvalues.temp";
+                                       m->openOutputFile(tempFile, out);
+                                       
+                                       //pass values
+                                       for (int i = 0; i < pValues.size(); i++) {
+                        for (int j = 0; j < pValues[i].size(); j++) {
+                            out << pValues[i][j] << '\t';
+                        }
+                        out << endl;
+                                       }
+                                       out << endl;
+                                       
+                                       out.close();
+                                       
+                                       exit(0);
+                               }else {
+                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+                                       for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                                       exit(0);
+                               }
+                       }
+                       
+                       //do my part
+                       pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+                       
+                       //force parent to wait until all the processes are done
+                       for (int i=0;i<processIDS.size();i++) {
+                               int temp = processIDS[i];
+                               wait(&temp);
+                       }
+                       
+                       //combine results
+                       for (int i = 0; i < processIDS.size(); i++) {
+                               ifstream in;
+                               string tempFile =  toString(processIDS[i]) + ".pvalues.temp";
+                               m->openInputFile(tempFile, in);
+                               
+                               ////// to do ///////////
+                               int numTemp; numTemp = 0;
+                               
+                for (int j = 0; j < pValues.size(); j++) {
+                    for (int k = 0; k < pValues.size(); k++) {
+                        in >> numTemp; m->gobble(in);
+                        pValues[j][k] += numTemp;
+                    }
+                    m->gobble(in);
+                               }
+                               in.close(); m->mothurRemove(tempFile);
+                       }
+#else
+            
+            //fill in functions
+            vector<sparccData*> pDataArray;
+            DWORD   dwThreadIdArray[processors-1];
+            HANDLE  hThreadArray[processors-1];
+            
+            //Create processor worker threads.
+            for( int i=1; i<processors; i++ ){
+                
+                //make copy so we don't get access violations
+                vector< vector<float> > copySharedVector = sharedVector;
+                vector< vector<float> > copyOrig = origCorrMatrix;
+                
+                sparccData* temp = new sparccData(m, procIters[i], copySharedVector, copyOrig, numSamplings, maxIterations, numPermutations, normalizeMethod);
+                pDataArray.push_back(temp);
+                processIDS.push_back(i);
+                
+                hThreadArray[i-1] = CreateThread(NULL, 0, MySparccThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+            }
+            
+            //do my part
+                       pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+            
+            //Wait until all threads have terminated.
+            WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+            
+            //Close all thread handles and free memory allocations.
+            for(int i=0; i < pDataArray.size(); i++){
+                for (int j = 0; j < pDataArray[i]->pValues.size(); j++) {
+                    for (int k = 0; k < pDataArray[i]->pValues[j].size(); k++) {
+                        pValues[j][k] += pDataArray[i]->pValues[j][k];
+                    }
+                }
+                
+                CloseHandle(hThreadArray[i]);
+                delete pDataArray[i];
+            }
+#endif
+               }
+
+        for(int i=0;i<numOTUs;i++){
+            pValues[i][i] = 1;
+            for(int j=0;j<i;j++){
+                pValues[i][j] /= (double)numPermutations;
+                pValues[j][i] = pValues[i][j];
+            }
+        }
+        
+        return pValues;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::driver(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix, int numPerms){
+       try {
+        int numOTUs = sharedVector[0].size();
+        vector<vector<float> > sharedShuffled = sharedVector;
+        vector<vector<float> > pValues(numOTUs);
+        for(int i=0;i<numOTUs;i++){ pValues[i].assign(numOTUs, 0);  }
+
+        for(int i=0;i<numPerms;i++){
+            if (m->control_pressed) { return pValues; }
+            sharedShuffled = shuffleSharedVector(sharedVector);
+            CalcSparcc permutedData(sharedShuffled, maxIterations, numSamplings, normalizeMethod);
+            vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+            
+            for(int j=0;j<numOTUs;j++){
+                for(int k=0;k<j;k++){
+                    double randValue = permuteCorrMatrix[j][k];
+                    double observedValue = origCorrMatrix[j][k];
+                    if(observedValue >= 0 &&  randValue > observedValue)   { pValues[j][k]++; }//this method seems to deflate the
+                    else if(observedValue < 0 && randValue < observedValue){ pValues[j][k]++; }//pvalues of small rho values
+                }
+            }
+            if((i+1) % (int)(numPermutations * 0.05) == 0){ cout << i+1 << endl;  }
+        }
+        
+        return pValues;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
diff --git a/sparcccommand.h b/sparcccommand.h
new file mode 100644 (file)
index 0000000..8add3ed
--- /dev/null
@@ -0,0 +1,133 @@
+//
+//  sparcccommand.h
+//  Mothur
+//
+//  Created by SarahsWork on 5/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sparcccommand_h
+#define Mothur_sparcccommand_h
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "calcsparcc.h"
+
+/**************************************************************************************************/
+
+class SparccCommand : public Command {
+public:
+    SparccCommand(string);
+    SparccCommand();
+    ~SparccCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "sparcc";                      }
+    string getCommandCategory()                { return "OTU-Based Approaches";                }
+    
+    string getOutputPattern(string);
+    //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+       string getHelpString();
+    string getCitation() { return "Friedman J, Alm EJ (2012) Inferring Correlation Networks from Genomic Survey Data. PLoS Comput Biol 8(9): e1002687. doi:10.1371/journal.pcbi.1002687 http://www.mothur.org/wiki/Sparcc"; }
+    string getDescription()            { return "brief description"; }
+    
+    int execute();
+    void help() { m->mothurOut(getHelpString()); }
+    
+private:
+    bool abort, allLines;
+    string outputDir, sharedfile, normalizeMethod;
+    int numSamplings, maxIterations, numPermutations, processors;
+    set<string> labels;
+    vector<string> Groups;
+    vector<string> outputNames;
+    
+    int process(vector<SharedRAbundVector*>&);
+    vector<vector<float> > createProcesses(vector<vector<float> >&, vector<vector<float> >&);
+    vector<vector<float> > driver(vector<vector<float> >&, vector<vector<float> >&, int);
+    vector<vector<float> > shuffleSharedVector(vector<vector<float> >&);
+};
+
+/**************************************************************************************************/
+
+struct sparccData {
+       MothurOut* m;
+    int numPerms;
+    vector< vector<float> > sharedVector;
+    vector< vector<float> > origCorrMatrix;
+    vector<vector<float> > pValues;
+    int numSamplings, maxIterations, numPermutations;
+    string normalizeMethod;
+       
+       sparccData(){}
+       sparccData(MothurOut* mout, int it, vector< vector<float> > cs, vector< vector<float> > co, int ns, int mi, int np, string nm) {
+               m = mout;
+        numPerms = it;
+        sharedVector = cs;
+        origCorrMatrix = co;
+        numSamplings = ns;
+        maxIterations = mi;
+        numPermutations = np;
+        normalizeMethod = nm;
+    }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MySparccThreadFunction(LPVOID lpParam){
+       sparccData* pDataArray;
+       pDataArray = (sparccData*)lpParam;
+       
+       try {
+        
+        int numOTUs = pDataArray->sharedVector[0].size();
+        vector<vector<float> > sharedShuffled = pDataArray->sharedVector;
+        pDataArray->pValues.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){ pDataArray->pValues[i].assign(numOTUs, 0);  }
+        
+        for(int i=0;i<pDataArray->numPerms;i++){
+            if (pDataArray->m->control_pressed) { return 0; }
+            
+            //sharedShuffled = shuffleSharedVector(sharedVector);
+            //////////////////////////////////////////////////////////
+            int numGroups = (int)pDataArray->sharedVector.size();
+            sharedShuffled = pDataArray->sharedVector;
+            
+            for(int k=0;k<numGroups;k++){
+                for(int j=0;j<numOTUs;j++){
+                    sharedShuffled[k][j] = pDataArray->sharedVector[rand()%numGroups][j];
+                }
+            }
+            /////////////////////////////////////////////////////////
+            
+            CalcSparcc permutedData(sharedShuffled, pDataArray->maxIterations, pDataArray->numSamplings, pDataArray->normalizeMethod);
+            vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+            
+            for(int j=0;j<numOTUs;j++){
+                for(int k=0;k<j;k++){
+                    double randValue = permuteCorrMatrix[j][k];
+                    double observedValue = pDataArray->origCorrMatrix[j][k];
+                    if(observedValue >= 0 &&  randValue > observedValue)   { pDataArray->pValues[j][k]++; }//this method seems to deflate the
+                    else if(observedValue < 0 && randValue < observedValue){ pDataArray->pValues[j][k]++; }//pvalues of small rho values
+                }
+            }
+            if((i+1) % (int)(pDataArray->numPermutations * 0.05) == 0){ cout << i+1 << endl;  }
+        }
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "SparccCommand", "MySparccThreadFunction");
+               exit(1);
+       }
+}
+#endif
+
+
+/**************************************************************************************************/
+
+
+
+
+#endif
index 2e8b9055eb117f02596864cd35289d92ea3a4a87..f6b5c4d81cf5446adc5e5253808380be7ebbf28d 100644 (file)
@@ -182,7 +182,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                }
                
                copyGroups.clear();
-               
+        
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
@@ -207,6 +207,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
             else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
                }
         
+        //restore old fasta file name since dist.seqs overwrites it with the temp files
+        m->setFastaFile(fastafile);
+        
         vector<string> tempDistFiles;    
         for(int i=0;i<numGroups;i++){
             if (outputDir == "") { outputDir = m->hasPath(fastafile); }
index 0119741988c010e0d3b3d3df1bbc4c052f5bba8f..80698f1b4b0317719421dbe9c4af3c0583bd9d97 100644 (file)
@@ -662,8 +662,18 @@ int TreeGroupCommand::makeSimsShared() {
             
             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
         }
-        
         numGroups = lookup.size();
+        
+        //sanity check to make sure processors < numComparisions
+        int numDists = 0;
+        for(int i=0;i<numGroups;i++){
+            for(int j=0;j<i;j++){
+                numDists++;
+                if (numDists > processors) { break; }
+            }
+        }
+        if (numDists < processors) { processors = numDists; }
+        
                lines.resize(processors);
                for (int i = 0; i < processors; i++) {
                        lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
@@ -918,6 +928,8 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 thisItersLookup.clear();
                 for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
             }
+            
+            if (m->debug) {  m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
                }
         
                if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }