]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge branch 'master' into dev-ml
authorAbu Zaher Md. Faridee <zaher14@gmail.com>
Thu, 23 May 2013 21:17:06 +0000 (03:17 +0600)
committerAbu Zaher Md. Faridee <zaher14@gmail.com>
Thu, 23 May 2013 21:17:06 +0000 (03:17 +0600)
51 files changed:
Mothur.xcodeproj/project.pbxproj
calcsparcc.cpp [new file with mode: 0644]
calcsparcc.h [new file with mode: 0644]
clustersplitcommand.cpp
commandfactory.cpp
createdatabasecommand.cpp
createdatabasecommand.h
flowdata.cpp
getlineagecommand.cpp
indicatorcommand.cpp
indicatorcommand.h
linearalgebra.cpp
linearalgebra.h
makefile
makelookupcommand.cpp [new file with mode: 0644]
makelookupcommand.h [new file with mode: 0644]
mothurout.cpp
mothurout.h
parsefastaqcommand.cpp
parsimony.cpp
parsimony.h
phylodiversitycommand.cpp
phylodiversitycommand.h
prcseqscommand.cpp
preclustercommand.cpp
qualityscores.cpp
randomnumber.cpp [new file with mode: 0644]
randomnumber.h [new file with mode: 0644]
raredisplay.cpp
raredisplay.h
readcolumn.cpp
removegroupscommand.cpp
sensspeccommand.cpp
sensspeccommand.h
seqerrorcommand.cpp
sequence.cpp
setdircommand.cpp
setdircommand.h
sffmultiplecommand.cpp
sharedrabundfloatvector.cpp
sharedrabundfloatvector.h
shhhercommand.cpp
sparcccommand.cpp [new file with mode: 0644]
sparcccommand.h [new file with mode: 0644]
splitmatrix.cpp
summarysharedcommand.cpp
treegroupscommand.cpp
trimflowscommand.cpp
trimseqscommand.cpp
unweighted.cpp
unweighted.h

index b2d579f7b492326ca6db86ea25c99fa284432438..0ac7ba85a278ffde691f78cf49f7101dd5aa51b1 100644 (file)
@@ -53,6 +53,9 @@
                A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
                A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
+               A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
+               A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
+               A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7189173D40E4002163C2 /* calcsparcc.cpp */; };
                A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E1937161B201E00DB1A2A /* randomforest.cpp */; };
                A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E193A161B289600DB1A2A /* rftreenode.cpp */; };
                A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; };
@@ -75,6 +78,7 @@
                A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
                A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
                A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
+               A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */; };
                A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
                A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
                A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
                A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
                A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
                A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
+               A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
+               A77B7184173D2240002163C2 /* sparcccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparcccommand.cpp; sourceTree = "<group>"; };
+               A77B7186173D4041002163C2 /* randomnumber.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomnumber.cpp; sourceTree = "<group>"; };
+               A77B7187173D4041002163C2 /* randomnumber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = randomnumber.h; sourceTree = "<group>"; };
+               A77B7189173D40E4002163C2 /* calcsparcc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = calcsparcc.cpp; sourceTree = "<group>"; };
+               A77B718A173D40E4002163C2 /* calcsparcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = calcsparcc.h; sourceTree = "<group>"; };
                A77E1937161B201E00DB1A2A /* randomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomforest.cpp; sourceTree = "<group>"; };
                A77E193A161B289600DB1A2A /* rftreenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rftreenode.cpp; sourceTree = "<group>"; };
                A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = "<group>"; };
                A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
                A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparsedistancematrix.cpp; sourceTree = "<group>"; };
                A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sparsedistancematrix.h; sourceTree = "<group>"; };
+               A7E6F69C17427CF2006775E2 /* makelookupcommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = makelookupcommand.h; sourceTree = "<group>"; };
+               A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makelookupcommand.cpp; sourceTree = "<group>"; };
                A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
                                A7A61F1A130035C800E05B6B /* LICENSE */,
                                A70332B512D3A13400761E33 /* makefile */,
                                A7E9B65912D37EC300DA6239 /* averagelinkage.cpp */,
+                               A77B718A173D40E4002163C2 /* calcsparcc.h */,
+                               A77B7189173D40E4002163C2 /* calcsparcc.cpp */,
                                A7E9BA4F12D398D700DA6239 /* clearcut */,
                                A7E9B69812D37EC400DA6239 /* cluster.cpp */,
                                A7E9B69912D37EC400DA6239 /* cluster.hpp */,
                                A7E9B79C12D37EC400DA6239 /* progress.hpp */,
                                A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
                                A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+                               A77B7187173D4041002163C2 /* randomnumber.h */,
+                               A77B7186173D4041002163C2 /* randomnumber.cpp */,
                                A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
                                A7386C191619C9FB00651424 /* randomforest */,
                                A7E9B7A612D37EC400DA6239 /* rarecalc.h */,
                                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
                                A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
                                A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */,
+                               A7E6F69C17427CF2006775E2 /* makelookupcommand.h */,
+                               A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */,
                                A7E9B74A12D37EC400DA6239 /* matrixoutputcommand.h */,
                                A7E9B74912D37EC400DA6239 /* matrixoutputcommand.cpp */,
                                A7E9B75412D37EC400DA6239 /* mergefilecommand.h */,
                                A774101314695AF60098E6AC /* shhhseqscommand.cpp */,
                                A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */,
                                A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */,
+                               A77B7183173D222F002163C2 /* sparcccommand.h */,
+                               A77B7184173D2240002163C2 /* sparcccommand.cpp */,
                                A7E9B84012D37EC400DA6239 /* splitabundcommand.h */,
                                A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
                                A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
                                A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
                                A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */,
                                A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */,
+                               A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */,
+                               A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */,
+                               A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */,
+                               A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/calcsparcc.cpp b/calcsparcc.cpp
new file mode 100644 (file)
index 0000000..9d1d88a
--- /dev/null
@@ -0,0 +1,344 @@
+//
+//  runSparcc.cpp
+//  PDSSparCC
+//
+//  Created by Patrick Schloss on 10/31/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "calcSparcc.h"
+#include "linearalgebra.h"
+
+/**************************************************************************************************/
+
+CalcSparcc::CalcSparcc(vector<vector<float> > sharedVector, int maxIterations, int numSamplings, string method){
+    try {
+        m = MothurOut::getInstance();
+        numOTUs = (int)sharedVector[0].size();
+        numGroups = (int)sharedVector.size();
+        normalizationMethod = method;
+        int numOTUs = (int)sharedVector[0].size();
+        
+        addPseudoCount(sharedVector);
+        
+        vector<vector<vector<float> > > allCorrelations(numSamplings);
+        
+        //    float cycClockStart = clock();
+        //    unsigned long long cycTimeStart = time(NULL);
+        
+        for(int i=0;i<numSamplings;i++){
+            if (m->control_pressed) { break; }
+            vector<float> logFractions =  getLogFractions(sharedVector, method);
+            getT_Matrix(logFractions);     //this step is slow...
+            getT_Vector();
+            getD_Matrix();
+            vector<float> basisVariances = getBasisVariances();     //this step is slow...
+            vector<vector<float> > correlation = getBasisCorrelations(basisVariances);
+            
+            excluded.resize(numOTUs);
+            for(int j=0;j<numOTUs;j++){ excluded[j].assign(numOTUs, 0); }
+            
+            float maxRho = 1;
+            int excludeRow = -1;
+            int excludeColumn = -1;
+            
+            int iter = 0;
+            while(maxRho > 0.10 && iter < maxIterations){
+                maxRho = getExcludedPairs(correlation, excludeRow, excludeColumn);
+                excludeValues(excludeRow, excludeColumn);
+                vector<float> excludedBasisVariances = getBasisVariances();
+                correlation = getBasisCorrelations(excludedBasisVariances);
+                iter++;
+            }
+            allCorrelations[i] = correlation;
+        }
+        
+        if (!m->control_pressed) {
+            if(numSamplings > 1){
+                getMedian(allCorrelations);
+            }
+            else{
+                median = allCorrelations[0];
+            }
+        }
+//    cout << median[0][3] << '\t' << median[0][6] << endl;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "CalcSparcc");
+        exit(1);
+    }
+}
+    
+/**************************************************************************************************/
+
+void CalcSparcc::addPseudoCount(vector<vector<float> >& sharedVector){
+    try {
+        for(int i=0;i<numGroups;i++){   //iterate across the groups
+            if (m->control_pressed) { return; }
+            for(int j=0;j<numOTUs;j++){
+                sharedVector[i][j] += 1;
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "addPseudoCount");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getLogFractions(vector<vector<float> > sharedVector, string method){   //dirichlet by default
+    try {
+        vector<float> logSharedFractions(numGroups * numOTUs, 0);
+        
+        if(method == "dirichlet"){
+            vector<float> alphas(numGroups);
+            for(int i=0;i<numGroups;i++){   //iterate across the groups
+                if (m->control_pressed) { return logSharedFractions; }
+                alphas = RNG.randomDirichlet(sharedVector[i]);
+                
+                for(int j=0;j<numOTUs;j++){
+                    logSharedFractions[i * numOTUs + j] = alphas[j];
+                }
+            }
+        }
+        else if(method == "relabund"){
+            for(int i=0;i<numGroups;i++){
+                if (m->control_pressed) { return logSharedFractions; }
+                float total = 0.0;
+                for(int j=0;j<numOTUs;j++){
+                    total += sharedVector[i][j];
+                }
+                for(int j=0;j<numOTUs;j++){
+                    logSharedFractions[i * numOTUs + j] = sharedVector[i][j]/total;
+                }
+            }
+        }
+        
+        for(int i=0;i<logSharedFractions.size();i++){
+            logSharedFractions[i] = log(logSharedFractions[i]);
+        }
+        
+        return logSharedFractions;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "addPseudoCount");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Matrix(vector<float> sharedFractions){
+    try {
+        tMatrix.resize(numOTUs * numOTUs, 0);
+        
+        vector<float> diff(numGroups);
+        
+        for(int j1=0;j1<numOTUs;j1++){
+            for(int j2=0;j2<j1;j2++){
+                if (m->control_pressed) { return; }
+                float mean = 0.0;
+                for(int i=0;i<numGroups;i++){
+                    diff[i] = sharedFractions[i * numOTUs + j1] - sharedFractions[i * numOTUs + j2];
+                    mean += diff[i];
+                }
+                mean /= float(numGroups);
+                
+                float variance = 0.0;
+                for(int i=0;i<numGroups;i++){
+                    variance += (diff[i] - mean) * (diff[i] - mean);
+                }
+                variance /= (float)(numGroups-1);
+                
+                tMatrix[j1 * numOTUs + j2] = variance;
+                tMatrix[j2 * numOTUs + j1] = tMatrix[j1 * numOTUs + j2];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getT_Matrix");
+        exit(1);
+    }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Vector(){
+    try {
+        tVector.assign(numOTUs, 0);
+        
+        for(int j1=0;j1<numOTUs;j1++){
+            if (m->control_pressed) { return; }
+            for(int j2=0;j2<numOTUs;j2++){
+                tVector[j1] += tMatrix[j1 * numOTUs + j2];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getT_Vector");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getD_Matrix(){
+    try {
+        float d = numOTUs - 1.0;
+        
+        dMatrix.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { return; }
+            dMatrix[i].resize(numOTUs, 1);
+            dMatrix[i][i] = d;
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getD_Matrix");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getBasisVariances(){
+    try {
+        LinearAlgebra LA;
+        
+        vector<float> variances = LA.solveEquations(dMatrix, tVector);
+        
+        for(int i=0;i<variances.size();i++){
+            if (m->control_pressed) { return variances; }
+            if(variances[i] < 0){   variances[i] = 1e-4;    }
+        }
+        
+        return variances;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getBasisVariances");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+vector<vector<float> > CalcSparcc::getBasisCorrelations(vector<float> basisVariance){
+    try {
+        vector<vector<float> > rho(numOTUs);
+        for(int i=0;i<numOTUs;i++){ rho[i].resize(numOTUs, 0);    }
+        
+        for(int i=0;i<numOTUs;i++){
+            float var_i = basisVariance[i];
+            float sqrt_var_i = sqrt(var_i);
+            
+            rho[i][i] = 1.00;
+            
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return rho; }
+                float var_j = basisVariance[j];
+                
+                rho[i][j] = (var_i + var_j - tMatrix[i * numOTUs + j]) / (2.0 * sqrt_var_i * sqrt(var_j));
+                if(rho[i][j] > 1.0)         {   rho[i][j] = 1.0;   }
+                else if(rho[i][j] < -1.0)   {   rho[i][j] = -1.0;  }
+                
+                rho[j][i] = rho[i][j];
+                
+            }
+        }
+        
+        return rho;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getBasisCorrelations");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+float CalcSparcc::getExcludedPairs(vector<vector<float> > rho, int& maxRow, int& maxColumn){
+    try {
+        float maxRho = 0;
+        maxRow = -1;
+        maxColumn = -1;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return maxRho; }
+                float tester = abs(rho[i][j]);
+                
+                if(tester > maxRho && excluded[i][j] != 1){
+                    maxRho = tester;
+                    maxRow = i;
+                    maxColumn = j;
+                }
+            }
+            
+        }
+        
+        return maxRho;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getExcludedPairs");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::excludeValues(int excludeRow, int excludeColumn){
+    try { 
+        tVector[excludeRow] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+        tVector[excludeColumn] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+        
+        dMatrix[excludeRow][excludeColumn] = 0;
+        dMatrix[excludeColumn][excludeRow] = 0;
+        dMatrix[excludeRow][excludeRow]--;
+        dMatrix[excludeColumn][excludeColumn]--;
+        
+        excluded[excludeRow][excludeColumn] = 1;
+        excluded[excludeColumn][excludeRow] = 1;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "excludeValues");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getMedian(vector<vector<vector<float> > > allCorrelations){
+    try {
+        int numSamples = (int)allCorrelations.size();
+        median.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){ median[i].assign(numOTUs, 1);   }
+        
+        vector<float> hold(numSamples);
+        
+        for(int i=0;i<numOTUs;i++){
+            for(int j=0;j<i;j++){
+                if (m->control_pressed) { return; }
+                
+                for(int k=0;k<numSamples;k++){
+                    hold[k] = allCorrelations[k][i][j];
+                }
+                
+                sort(hold.begin(), hold.end());
+                median[i][j] = hold[int(numSamples * 0.5)];
+                median[j][i] = median[i][j];
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "CalcSparcc", "getMedian");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
diff --git a/calcsparcc.h b/calcsparcc.h
new file mode 100644 (file)
index 0000000..05eff85
--- /dev/null
@@ -0,0 +1,57 @@
+
+#ifndef PDSSparCC_runSparcc_h
+#define PDSSparCC_runSparcc_h
+
+//
+//  runSparcc.h
+//  PDSSparCC
+//
+//  Created by Patrick Schloss on 10/31/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+/**************************************************************************************************/
+
+//#include "sparcc.h"
+#include "randomnumber.h"
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class CalcSparcc {
+       
+public:
+       CalcSparcc(vector<vector<float> >, int, int, string);
+    vector<vector<float> > getRho()    {   return median;  }
+private:
+    MothurOut* m;
+    void addPseudoCount(vector<vector<float> >&);
+    vector<float> getLogFractions(vector<vector<float> >, string);
+    void getT_Matrix(vector<float>);
+    
+    
+    void getT_Vector();
+    void getD_Matrix();
+    vector<float> getBasisVariances();
+    vector<vector<float> > getBasisCorrelations(vector<float>);
+    float getExcludedPairs(vector<vector<float> >, int&, int&);
+    void excludeValues(int, int);
+    void getMedian(vector<vector<vector<float> > >);
+
+    vector<float> tMatrix;
+
+    vector<vector<float> > dMatrix;
+    vector<float> tVector;
+    vector<vector<int> > excluded;
+    vector<vector<float> > median;
+    
+    int numOTUs;
+    int numGroups;
+    string normalizationMethod;
+    
+    RandomNumberGenerator RNG;    
+};
+
+#endif
+
+/**************************************************************************************************/
index 74867ebdeb0ec271d07d727b7cf9c8d19aa13284..270ea6208d8ead0683d8cf46fb8d7a417469c243 100644 (file)
@@ -843,9 +843,12 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                        if (itLabel->first == -1) { thisLabel = "unique"; }
                        else { thisLabel = toString(itLabel->first,  length-1);  } 
                        
-                       outList << thisLabel << '\t' << itLabel->second << '\t';
+                       //outList << thisLabel << '\t' << itLabel->second << '\t';
             
             RAbundVector* rabund = NULL;
+            ListVector completeList;
+            completeList.setLabel(thisLabel);
+            
             if (countfile == "") {
                 rabund = new RAbundVector();
                 rabund->setLabel(thisLabel);
@@ -854,7 +857,8 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                        //add in singletons
                        if (listSingle != NULL) {
                                for (int j = 0; j < listSingle->getNumBins(); j++) {
-                                       outList << listSingle->get(j) << '\t';
+                                       //outList << listSingle->get(j) << '\t';
+                    completeList.push_back(listSingle->get(j));
                                        if (countfile == "") { rabund->push_back(m->getNumNames(listSingle->get(j))); }
                                }
                        }
@@ -871,7 +875,8 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                                if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
                                else {          
                                        for (int j = 0; j < list->getNumBins(); j++) {
-                                               outList << list->get(j) << '\t';
+                                               //outList << list->get(j) << '\t';
+                        completeList.push_back(list->get(j));
                                                if (countfile == "") { rabund->push_back(m->getNumNames(list->get(j))); }
                                        }
                                        delete list;
@@ -884,7 +889,8 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                 sabund.print(outSabund);
                 rabund->print(outRabund);
             }
-                       outList << endl;
+                       //outList << endl;
+            completeList.print(outList);
                        
                        if (rabund != NULL) { delete rabund; }
                }
index 6c6bc17bb3dfd47f31b68facd01d6e0724cf3d8d..387ee10833323300a66a0e6d1161a64bff6fb9ee 100644 (file)
 #include "removedistscommand.h"
 #include "mergetaxsummarycommand.h"
 #include "getmetacommunitycommand.h"
+#include "sparcccommand.h"
+#include "makelookupcommand.h"
 
 /*******************************************************/
 
@@ -307,6 +309,8 @@ CommandFactory::CommandFactory(){
     commands["remove.dists"]        = "remove.dists";
     commands["merge.taxsummary"]    = "merge.taxsummary";
     commands["get.metacommunity"]   = "get.metacommunity";
+    commands["sparcc"]              = "sparcc";
+    commands["make.lookup"]         = "make.lookup";
     
 
 }
@@ -528,6 +532,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "remove.dists")          {      command = new RemoveDistsCommand(optionString);             }
         else if(commandName == "merge.taxsummary")      {      command = new MergeTaxSummaryCommand(optionString);         }
         else if(commandName == "get.metacommunity")     {      command = new GetMetaCommunityCommand(optionString);        }
+        else if(commandName == "sparcc")                {      command = new SparccCommand(optionString);                  }
+        else if(commandName == "make.lookup")                  {       command = new MakeLookupCommand(optionString);                          }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -690,6 +696,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "remove.dists")          {      pipecommand = new RemoveDistsCommand(optionString);             }
         else if(commandName == "merge.taxsummary")      {      pipecommand = new MergeTaxSummaryCommand(optionString);         }
         else if(commandName == "get.metacommunity")     {      pipecommand = new GetMetaCommunityCommand(optionString);        }
+        else if(commandName == "sparcc")                {      pipecommand = new SparccCommand(optionString);                  }
+        else if(commandName == "make.lookup")                  {       pipecommand = new MakeLookupCommand(optionString);                              }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -838,6 +846,8 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "remove.dists")          {      shellcommand = new RemoveDistsCommand();            }
         else if(commandName == "merge.taxsummary")      {      shellcommand = new MergeTaxSummaryCommand();        }
         else if(commandName == "get.metacommunity")     {      shellcommand = new GetMetaCommunityCommand();       }
+        else if(commandName == "sparcc")                {      shellcommand = new SparccCommand();                 }
+        else if(commandName == "make.lookup")                  {       shellcommand = new MakeLookupCommand();                         }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 51211395356228a5e3b9b3f554a399da3a558c30..58799e7a33e840580654d006b7a085ec02e7c5f5 100644 (file)
 vector<string> CreateDatabaseCommand::setParameters(){ 
        try {
                CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,true,true); parameters.push_back(pfasta);
-               CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname);
+               CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
                CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy);
                CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist);
         CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -35,17 +36,18 @@ vector<string> CreateDatabaseCommand::setParameters(){
 string CreateDatabaseCommand::getHelpString(){ 
        try {
                string helpString = "";
-               helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
-               helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
+               helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n";
+               helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group, count and label. List, repfasta, repnames or count, and contaxonomy are required.\n";
         helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
         helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
-        helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
+        helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n";
+        helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n";
         helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
         helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
         helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
         helpString += "The create.database command should be in the following format: \n";
                helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";       
-               helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
+               helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, repname=final.an.0.03.rep.names, list=final.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
                helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n";       
                return helpString;
        }
@@ -155,6 +157,14 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
                 
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
+                
                 it = parameters.find("shared");
                                //user has given a template file
                                if(it != parameters.end()){ 
@@ -211,10 +221,23 @@ CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
                        else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
 
             repnamesfile = validParameter.validFile(parameters, "repname", true);
-                       if (repnamesfile == "not found") {  //if there is a current list file, use it
-                repnamesfile = "";  m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
-                       }
+                       if (repnamesfile == "not found") {  repnamesfile = "";                          }
                        else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not found") {  countfile = "";                        }
+                       else if (countfile == "not open") { countfile = ""; abort = true; }
+            
+            if ((countfile == "") && (repnamesfile == "")) {
+                //if there is a current name file, use it, else look for current count file
+                string repnamesfile = m->getNameFile();
+                               if (repnamesfile != "") {  m->mothurOut("Using " + repnamesfile + " as input file for the repname parameter."); m->mothurOutEndLine(); }
+                               else {
+                    countfile = m->getCountTableFile();
+                    if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                    else { m->mothurOut("[ERROR]: You must provide a count or repname file."); m->mothurOutEndLine(); abort = true;  }
+                }
+            }
 
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { groupfile = ""; abort = true; }  
@@ -253,7 +276,32 @@ int CreateDatabaseCommand::execute(){
         
         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
         map<string, string> repNames;
-        int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
+        map<string, int> nameMap;
+        int numUniqueNamesFile = 0;
+        CountTable ct;
+        if (countfile == "") {
+            numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
+            //the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below
+            map<string, string> tempRepNames;
+            for (map<string, string>::iterator it = repNames.begin(); it != repNames.end();) {
+                string bin = it->first;
+                vector<string> temp;
+                m->splitAtChar(bin, temp, ',');
+                sort(temp.begin(), temp.end());
+                bin = "";
+                for (int i = 0; i < temp.size()-1; i++) {
+                    bin += temp[i] + ',';
+                }
+                bin += temp[temp.size()-1];
+                tempRepNames[bin] = it->second;
+                repNames.erase(it++);
+            }
+            repNames = tempRepNames;
+        }else {
+            ct.readTable(countfile, true);
+            numUniqueNamesFile = ct.getNumUniqueSeqs();
+            nameMap = ct.getNameMap();
+        }
         
         //are there the same number of otus in the fasta and name files
         if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
@@ -307,6 +355,11 @@ int CreateDatabaseCommand::execute(){
             if (groupfile != "") { 
                 header = "OTUNumber\t";
                 for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
+            }else if (countfile != "") {
+                if (ct.hasGroupInfo()) {
+                    header = "OTUNumber\t";
+                    for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += (ct.getNamesOfGroups())[i] + '\t'; }
+                }
             }
             header += "repSeqName\trepSeq\tOTUConTaxonomy";
             out << header << endl;
@@ -319,17 +372,46 @@ int CreateDatabaseCommand::execute(){
                 
                 vector<string> binNames;
                 string bin = list->get(i);
-                
-                map<string, string>::iterator it = repNames.find(bin);
-                if (it == repNames.end()) {
-                    m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
-                }
-                
                 m->splitAtComma(bin, binNames);
                 
-                //sanity check
-                if (binNames.size() != classifyOtuSizes[i]) {
-                    m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                string seqRepName = "";
+                int numSeqsRep = 0;
+                
+                if (countfile == "") {
+                    sort(binNames.begin(), binNames.end());
+                    bin = "";
+                    for (int i = 0; i < binNames.size()-1; i++) {
+                        bin += binNames[i] + ',';
+                    }
+                    bin += binNames[binNames.size()-1];
+                    map<string, string>::iterator it = repNames.find(bin);
+                    
+                    if (it == repNames.end()) {
+                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                    }else { seqRepName = it->second;  numSeqsRep = binNames.size(); }
+                    
+                    //sanity check
+                    if (binNames.size() != classifyOtuSizes[i]) {
+                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                    }
+                }else {
+                    //find rep sequence in bin
+                    for (int j = 0; j < binNames.size(); j++) {
+                        map<string, int>::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file.
+                        if (itNameMap != nameMap.end()) {
+                            seqRepName = itNameMap->first;
+                            numSeqsRep = itNameMap->second;
+                            j += binNames.size();
+                        }
+                    }
+                    
+                    if (seqRepName == "") {
+                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                    }
+                    
+                    if (numSeqsRep != classifyOtuSizes[i]) {
+                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                    }
                 }
                 
                 //output abundances
@@ -353,10 +435,15 @@ int CreateDatabaseCommand::execute(){
                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
                     
                     if (error) { m->control_pressed = true; }
-                }else { out << binNames.size() << '\t'; }
+                }else if (countfile != "") {
+                    if (ct.hasGroupInfo()) {
+                        vector<int> groupCounts = ct.getGroupCounts(seqRepName);
+                        for (int j = 0; j < groupCounts.size(); j++) { out << groupCounts[j] << '\t';  }
+                    }else { out << numSeqsRep << '\t'; }
+                }else { out << numSeqsRep << '\t'; }
                 
                 //output repSeq
-                out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+                out << seqRepName << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
             }
             
             
@@ -471,6 +558,7 @@ vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
         ifstream in;
         m->openInputFile(repfastafile, in);
         
+        set<int> sanity;
         while (!in.eof()) {
             
             if (m->control_pressed) { break; }
@@ -481,11 +569,20 @@ vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
             //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
             vector<string> info;
             m->splitAtChar(binInfo, info, '|');
-            if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;}
+            //if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;}
             
             int size = 0;
             m->mothurConvert(info[1], size);
             
+            int binNumber = 0;
+            string temp = "";
+            for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } }
+            m->mothurConvert(temp, binNumber);
+            set<int>::iterator it = sanity.find(binNumber);
+            if (it != sanity.end()) {
+                m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;
+            }else { sanity.insert(binNumber); }
+            
             sizes.push_back(size);
             seqs.push_back(seq);
         }
index ca47b7a3e1ce1e3b8dafcdffe7280e1d0510418c..79b5646b864ddf13f59f69c3adb205d5df74fc16 100644 (file)
@@ -35,7 +35,7 @@ public:
 private:
        
        bool abort;
-       string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
+       string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir, countfile;
        
        vector<string> outputNames;
                
index b2e856c28e5c939a00102e19d34a97799c11ae08..66261e33b922f17be93e3f929582ca9ea7d9208f 100644 (file)
@@ -68,9 +68,7 @@ string FlowData::getSequenceName(ifstream& flowFile) {
         flowFile >> name;
                
                if (name.length() != 0) { 
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
         }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
                return name;
index 25a68be75c950de1ebad3afe6955cf308ad0ce0b..005a89f2ebbee9e1b4c37567b6ce382979c90226 100644 (file)
@@ -318,7 +318,7 @@ GetLineageCommand::GetLineageCommand(string option)  {
             
             if ((sharedfile != "") || (listfile != "")) {
                 label = validParameter.validFile(parameters, "label", false);
-                if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+                if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); }
             }
             
             if (countfile == "") {
index 8b9a88cc8116d679d5096be848f7b6a2c29b5215..fd818acb44c4c5280c6a54971d3d39413307861d 100644 (file)
@@ -453,7 +453,7 @@ int IndicatorCommand::GetIndicatorSpecies(){
                                
                        indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
                        
-                       pValues = getPValues(groupings, randomGroupingsMap, lookup.size(), indicatorValues);                            
+                       pValues = getPValues(groupings, lookup.size(), indicatorValues);                                
                }else {
                        vector< vector<SharedRAbundFloatVector*> > groupings;
                        set<string> groupsAlreadyAdded;
@@ -476,7 +476,7 @@ int IndicatorCommand::GetIndicatorSpecies(){
                        
                        indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
                        
-                       pValues = getPValues(groupings, randomGroupingsMap, lookupFloat.size(), indicatorValues);
+                       pValues = getPValues(groupings, lookupFloat.size(), indicatorValues);
                }
                        
                if (m->control_pressed) { out.close(); return 0; }
@@ -627,7 +627,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                                                
                                indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
                                
-                               pValues = getPValues(groupings, randomGroupingsMap, lookup.size(), indicatorValues);                            
+                               pValues = getPValues(groupings, lookup.size(), indicatorValues);                                
                        }else {
                                vector< vector<SharedRAbundFloatVector*> > groupings;
                                
@@ -676,7 +676,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                
                                indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
                                
-                               pValues = getPValues(groupings, randomGroupingsMap, lookupFloat.size(), indicatorValues);
+                               pValues = getPValues(groupings, lookupFloat.size(), indicatorValues);
                        }
                        
                        if (m->control_pressed) { out.close(); return 0; }
@@ -1142,7 +1142,7 @@ int IndicatorCommand::getSharedFloat(){
        }
 }
 //**********************************************************************************************************************
-vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues, int numIters){
+vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*> >& groupings, int num, vector<float> indicatorValues, int numIters){
        try {
                vector<float> pvalues;
                pvalues.resize(indicatorValues.size(), 0);
@@ -1150,7 +1150,7 @@ vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*>
                
                for(int i=0;i<numIters;i++){
                        if (m->control_pressed) { break; }
-                       groupingsMap = randomizeGroupings(groupings, num);
+                       map< vector<int>, vector<int> > groupingsMap = randomizeGroupings(groupings, num);
                        vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap);
                        
                        for (int j = 0; j < indicatorValues.size(); j++) {
@@ -1166,23 +1166,29 @@ vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*>
        }
 }
 //**********************************************************************************************************************
-vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues){
+vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVector*> >& groupings, int num, vector<float> indicatorValues){
        try {
                vector<float> pvalues;
-               
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
                if(processors == 1){
-                       pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
-                       for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+                       pvalues = driver(groupings, num, indicatorValues, iters);
+            for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
                }else{
+            //divide iters between processors
+                       vector<int> procIters;
+            int numItersPerProcessor = iters / processors;
+            
+            //divide iters between processes
+            for (int h = 0; h < processors; h++) {
+                if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+                procIters.push_back(numItersPerProcessor);
+            }
+            
+            vector<int> processIDS;
+            int process = 1;
                        
-                       //divide iters between processors
-                       int numItersPerProcessor = iters / processors;
-                       
-                       vector<int> processIDS;
-                       int process = 1;
-                       int num = 0;
-                       
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                                               
                        //loop through and create all the processes you want
                        while (process != processors) {
                                int pid = fork();
@@ -1191,7 +1197,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVecto
                                        processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                        process++;
                                }else if (pid == 0){
-                                       pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+                                       pvalues = driver(groupings, num, indicatorValues, procIters[process]);
                                        
                                        //pass pvalues to parent
                                        ofstream out;
@@ -1215,9 +1221,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVecto
                        }
                        
                        //do my part
-                       //special case for last processor in case it doesn't divide evenly
-                       numItersPerProcessor = iters - ((processors-1) * numItersPerProcessor);         
-                       pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+                       pvalues = driver(groupings, num, indicatorValues, procIters[0]);
                        
                        //force parent to wait until all the processes are done
                        for (int i=0;i<processIDS.size();i++) { 
@@ -1239,12 +1243,71 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVecto
                                }
                                in.close(); m->mothurRemove(tempFile);
                        }
-                       for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } 
-               }
+                       for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
 #else
-               pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
-               for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+                       
+            //fill in functions
+            vector<indicatorData*> pDataArray;
+            DWORD   dwThreadIdArray[processors-1];
+            HANDLE  hThreadArray[processors-1];
+            
+            //Create processor worker threads.
+            for( int i=1; i<processors; i++ ){
+                
+                //make copy of lookup so we don't get access violations
+                vector< vector<SharedRAbundFloatVector*> > newGroupings;
+
+                for (int k = 0; k < groupings.size(); k++) {
+                    vector<SharedRAbundFloatVector*> newLookup;
+                    for (int l = 0; l < groupings[k].size(); l++) {
+                        SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+                        temp->setLabel(groupings[k][l]->getLabel());
+                        temp->setGroup(groupings[k][l]->getGroup());
+                        newLookup.push_back(temp);
+                    }
+                    newGroupings.push_back(newLookup);
+                }
+                
+                //for each bin
+                for (int l = 0; l < groupings.size(); l++) {
+                    for (int k = 0; k < groupings[l][0]->getNumBins(); k++) {
+                        if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u];  } } return pvalues; }
+                        
+                        for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back(groupings[l][j]->getAbundance(k), groupings[l][j]->getGroup()); }
+                    }
+                }
+        
+                vector<float> copyIValues = indicatorValues;
+
+                indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues);
+                pDataArray.push_back(temp);
+                processIDS.push_back(i);
+                
+                hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+            }
+            
+            //do my part
+                       pvalues = driver(groupings, num, indicatorValues, procIters[0]);
+           
+            //Wait until all threads have terminated.
+            WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+            
+            //Close all thread handles and free memory allocations.
+            for(int i=0; i < pDataArray.size(); i++){
+                for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j];  }
+                
+                for (int l = 0; l < pDataArray[i]->groupings.size(); l++) {
+                    for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; }
+                }
+                
+                CloseHandle(hThreadArray[i]);
+                delete pDataArray[i];
+            }
+            
+            for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
 #endif
+               }
+
                
                return pvalues;
        }
@@ -1256,7 +1319,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVecto
 
 //**********************************************************************************************************************
 //same as above, just data type difference
-vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues, int numIters){
+vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& groupings, int num, vector<float> indicatorValues, int numIters){
        try {
                vector<float> pvalues;
                pvalues.resize(indicatorValues.size(), 0);
@@ -1264,7 +1327,7 @@ vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& gr
                
                for(int i=0;i<numIters;i++){
                        if (m->control_pressed) { break; }
-                       groupingsMap = randomizeGroupings(groupings, num);
+                       map< vector<int>, vector<int> > groupingsMap = randomizeGroupings(groupings, num);
                        vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap);
                        
                        for (int j = 0; j < indicatorValues.size(); j++) {
@@ -1281,22 +1344,29 @@ vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& gr
 }
 //**********************************************************************************************************************
 //same as above, just data type difference
-vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues){
+vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >& groupings, int num, vector<float> indicatorValues){
        try {
                vector<float> pvalues;
-               
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+        
                if(processors == 1){
-                       pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
-                       for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+                       pvalues = driver(groupings, num, indicatorValues, iters);
+            for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
                }else{
+            //divide iters between processors
+                       vector<int> procIters;
+            int numItersPerProcessor = iters / processors;
+            
+            //divide iters between processes
+            for (int h = 0; h < processors; h++) {
+                if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+                procIters.push_back(numItersPerProcessor);
+            }
+            
+            vector<int> processIDS;
+            int process = 1;
                        
-                       //divide iters between processors
-                       int numItersPerProcessor = iters / processors;
-                       
-                       vector<int> processIDS;
-                       int process = 1;
-                       
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            
                        //loop through and create all the processes you want
                        while (process != processors) {
                                int pid = fork();
@@ -1305,7 +1375,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >
                                        processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                        process++;
                                }else if (pid == 0){
-                                       pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+                                       pvalues = driver(groupings, num, indicatorValues, procIters[process]);
                                        
                                        //pass pvalues to parent
                                        ofstream out;
@@ -1321,49 +1391,106 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >
                                        out.close();
                                        
                                        exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               }else {
+                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
                                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                                        exit(0);
                                }
                        }
                        
                        //do my part
-                       //special case for last processor in case it doesn't divide evenly
-                       numItersPerProcessor = iters - ((processors-1) * numItersPerProcessor);         
-                       pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+                       pvalues = driver(groupings, num, indicatorValues, procIters[0]);
                        
                        //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDS.size();i++) { 
+                       for (int i=0;i<processIDS.size();i++) {
                                int temp = processIDS[i];
                                wait(&temp);
                        }
                        
-                       //combine results                       
+                       //combine results
                        for (int i = 0; i < processIDS.size(); i++) {
                                ifstream in;
                                string tempFile =  toString(processIDS[i]) + ".pvalues.temp";
                                m->openInputFile(tempFile, in);
                                
                                ////// to do ///////////
-                               int numTemp; numTemp = 0; 
+                               int numTemp; numTemp = 0;
                                for (int j = 0; j < pvalues.size(); j++) {
                                        in >> numTemp; m->gobble(in);
                                        pvalues[j] += numTemp;
                                }
                                in.close(); m->mothurRemove(tempFile);
                        }
-                       for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } 
-               }
+                       for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
 #else
-               pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
-               for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+            
+            //fill in functions
+            vector<indicatorData*> pDataArray;
+            DWORD   dwThreadIdArray[processors-1];
+            HANDLE  hThreadArray[processors-1];
+            
+            //Create processor worker threads.
+            for( int i=1; i<processors; i++ ){
+                
+                //make copy of lookup so we don't get access violations
+                vector< vector<SharedRAbundFloatVector*> > newGroupings;
+                
+                for (int k = 0; k < groupings.size(); k++) {
+                    vector<SharedRAbundFloatVector*> newLookup;
+                    for (int l = 0; l < groupings[k].size(); l++) {
+                        SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+                        temp->setLabel(groupings[k][l]->getLabel());
+                        temp->setGroup(groupings[k][l]->getGroup());
+                        newLookup.push_back(temp);
+                    }
+                    newGroupings.push_back(newLookup);
+                }
+                
+                //for each bin
+                for (int l = 0; l < groupings.size(); l++) {
+                    for (int k = 0; k < groupings[l][0]->getNumBins(); k++) {
+                        if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u];  } } return pvalues; }
+                        
+                        for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back((float)(groupings[l][j]->getAbundance(k)), groupings[l][j]->getGroup()); }
+                    }
+                }
+                
+                vector<float> copyIValues = indicatorValues;
+                
+                indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues);
+                pDataArray.push_back(temp);
+                processIDS.push_back(i);
+                
+                hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+            }
+            
+            //do my part
+                       pvalues = driver(groupings, num, indicatorValues, procIters[0]);
+            
+            //Wait until all threads have terminated.
+            WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+            
+            //Close all thread handles and free memory allocations.
+            for(int i=0; i < pDataArray.size(); i++){
+                for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j];  }
+                
+                for (int l = 0; l < pDataArray[i]->groupings.size(); l++) {
+                    for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; }
+                }
+                
+                CloseHandle(hThreadArray[i]);
+                delete pDataArray[i];
+            }
+            
+            for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
 #endif
+               }
+        
                
                return pvalues;
        }
        catch(exception& e) {
-               m->errorOut(e, "IndicatorCommand", "getPValues");       
+               m->errorOut(e, "IndicatorCommand", "getPValues");
                exit(1);
        }
 }
index 1f46081dd464c32e599cf1d173d1eba60df3db46..61bd565a302e8e8ecbfd77b4d723215c7c9cb485 100644 (file)
@@ -58,15 +58,144 @@ private:
        map< vector<int>, vector<int> > randomizeGroupings(vector< vector<SharedRAbundVector*> >&, int);
        map< vector<int>, vector<int> > randomizeGroupings(vector< vector<SharedRAbundFloatVector*> >&, int);
     
-       vector<float> driver(vector< vector<SharedRAbundFloatVector*> >&, map< vector<int>, vector<int> >, int, vector<float>, int);
-       vector<float> driver(vector< vector<SharedRAbundVector*> >&, map< vector<int>, vector<int> >, int, vector<float>, int);
+       vector<float> driver(vector< vector<SharedRAbundFloatVector*> >&, int, vector<float>, int);
+       vector<float> driver(vector< vector<SharedRAbundVector*> >&, int, vector<float>, int);
     
-       vector<float> getPValues(vector< vector<SharedRAbundFloatVector*> >&, map< vector<int>, vector<int> >, int, vector<float>);
-       vector<float> getPValues(vector< vector<SharedRAbundVector*> >&, map< vector<int>, vector<int> >, int, vector<float>);
+       vector<float> getPValues(vector< vector<SharedRAbundFloatVector*> >&, int, vector<float>);
+       vector<float> getPValues(vector< vector<SharedRAbundVector*> >&, int, vector<float>);
 
        
 };
 
+/**************************************************************************************************/
+
+struct indicatorData {
+    vector< vector<SharedRAbundFloatVector*> > groupings;
+       MothurOut* m;
+    int iters, num;
+    vector<float> indicatorValues;
+    vector<float> pvalues;
+       
+       indicatorData(){}
+       indicatorData(MothurOut* mout, int it, vector< vector<SharedRAbundFloatVector*> > ng, int n, vector<float> iv) {
+               m = mout;
+        iters = it;
+        groupings = ng;
+        indicatorValues = iv;
+        num = n;
+    }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyIndicatorThreadFunction(LPVOID lpParam){
+       indicatorData* pDataArray;
+       pDataArray = (indicatorData*)lpParam;
+       
+       try {
+        
+               pDataArray->pvalues.resize(pDataArray->indicatorValues.size(), 0);
+               
+               for(int i=0;i<pDataArray->iters;i++){
+                       if (pDataArray->m->control_pressed) { break; }
+            
+                       //groupingsMap = randomizeGroupings(groupings, num);
+            ///////////////////////////////////////////////////////////////////////
+            map< vector<int>, vector<int> > randomGroupings;
+            
+            for (int j = 0; j < pDataArray->num; j++) {
+                
+                //get random groups to swap to switch with
+                //generate random int between 0 and groupings.size()-1
+                int z = pDataArray->m->getRandomIndex(pDataArray->groupings.size()-1);
+                int x = pDataArray->m->getRandomIndex(pDataArray->groupings.size()-1);
+                int a = pDataArray->m->getRandomIndex(pDataArray->groupings[z].size()-1);
+                int b = pDataArray->m->getRandomIndex(pDataArray->groupings[x].size()-1);
+                //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;
+                
+                vector<int> from;
+                vector<int> to;
+                
+                from.push_back(z); from.push_back(a);
+                to.push_back(x); to.push_back(b);
+                
+                randomGroupings[from] = to;
+            }
+            ///////////////////////////////////////////////////////////////////////
+            
+                       //vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, randomGroupings);
+            ///////////////////////////////////////////////////////////////////////
+            vector<float> randomIndicatorValues;
+            map< vector<int>, vector<int> >::iterator it;
+            
+            //for each otu
+            for (int i = 0; i < pDataArray->groupings[0][0]->getNumBins(); i++) {
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                vector<float> terms;
+                float AijDenominator = 0.0;
+                vector<float> Bij;
+                
+                //get overall abundance of each grouping
+                for (int j = 0; j < pDataArray->groupings.size(); j++) {
+                    
+                    float totalAbund = 0;
+                    int numNotZero = 0;
+                    for (int k = 0; k < pDataArray->groupings[j].size(); k++) {
+                        vector<int> temp; temp.push_back(j); temp.push_back(k);
+                        it = randomGroupings.find(temp);
+                        
+                        if (it == randomGroupings.end()) { //this one didnt get moved
+                            totalAbund += pDataArray->groupings[j][k]->getAbundance(i);
+                            if (pDataArray->groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
+                        }else {
+                            totalAbund += pDataArray->groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i);
+                            if (pDataArray->groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
+                        }
+                        
+                    }
+                    
+                    //mean abundance
+                    float Aij = (totalAbund / (float) pDataArray->groupings[j].size());
+                    terms.push_back(Aij);
+                    
+                    //percentage of sites represented
+                    Bij.push_back(numNotZero / (float) pDataArray->groupings[j].size());
+                    
+                    AijDenominator += Aij;
+                }
+                
+                float maxIndVal = 0.0;
+                for (int j = 0; j < terms.size(); j++) { 
+                    float thisAij = (terms[j] / AijDenominator); //relative abundance
+                    float thisValue = thisAij * Bij[j] * 100.0;
+                    
+                    //save largest
+                    if (thisValue > maxIndVal) { maxIndVal = thisValue; }
+                }
+                
+                randomIndicatorValues.push_back(maxIndVal);
+            }
+
+            ///////////////////////////////////////////////////////////////////////
+                       
+                       for (int j = 0; j < pDataArray->indicatorValues.size(); j++) {
+                               if (randomIndicatorValues[j] >= pDataArray->indicatorValues[j]) { pDataArray->pvalues[j]++; }
+                       }
+               }
+
+        return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "IndicatorCommand", "MyIndicatorThreadFunction");
+               exit(1);
+       }
+}
+#endif
+
+
 
 #endif
 
index 58f1acccfc7ed4e3aefc902ad199dd7a17a0516d..e1b9470fdb218d94fd9f5fe9f36ccb23ac31ea8a 100644 (file)
@@ -1530,6 +1530,25 @@ vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<d
                exit(1);
        }
 }
+/*********************************************************************************************************************************/
+vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
+    try {
+        int length = (int)b.size();
+        vector<double> x(length, 0);
+        vector<int> index(length);
+        for(int i=0;i<length;i++){  index[i] = i;   }
+        float d;
+        
+        ludcmp(A, index, d);  if (m->control_pressed) { return b; }
+        lubksb(A, index, b);
+        
+        return b;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "solveEquations");
+               exit(1);
+       }
+}
 
 /*********************************************************************************************************************************/
 
@@ -1628,6 +1647,104 @@ void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vecto
                exit(1);
        }
 }
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
+    try {
+        double tiny = 1e-20;
+        
+        int n = (int)A.size();
+        vector<float> vv(n, 0.0);
+        double temp;
+        int imax;
+        
+        d = 1.0;
+        
+        for(int i=0;i<n;i++){
+            float big = 0.0;
+            for(int j=0;j<n;j++){   if((temp=fabs(A[i][j])) > big ) big=temp;  }
+            if(big==0.0){   m->mothurOut("Singular matrix in routine ludcmp\n");    }
+            vv[i] = 1.0/big;
+        }
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            for(int i=0;i<j;i++){
+                float sum = A[i][j];
+                for(int k=0;k<i;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+            }
+            
+            float big = 0.0;
+            for(int i=j;i<n;i++){
+                float sum = A[i][j];
+                for(int k=0;k<j;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+                float dum;
+                if((dum = vv[i] * fabs(sum)) >= big){
+                    big = dum;
+                    imax = i;
+                }
+            }
+            if(j != imax){
+                for(int k=0;k<n;k++){
+                    float dum = A[imax][k];
+                    A[imax][k] = A[j][k];
+                    A[j][k] = dum;
+                }
+                d = -d;
+                vv[imax] = vv[j];
+            }
+            index[j] = imax;
+            
+            if(A[j][j] == 0.0){ A[j][j] = tiny; }
+            
+            if(j != n-1){
+                float dum = 1.0/A[j][j];
+                for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+            }
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "ludcmp");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
+    try {
+        float total;
+        int n = (int)A.size();
+        int ii = 0;
+        
+        for(int i=0;i<n;i++){
+            if (m->control_pressed) { break; }
+            int ip = index[i];
+            total = b[ip];
+            b[ip] = b[i];
+            
+            if (ii != 0) {
+                for(int j=ii-1;j<i;j++){
+                    total -= A[i][j] * b[j];
+                }
+            }
+            else if(total != 0){  ii = i+1;   }
+            b[i] = total;
+        }
+        for(int i=n-1;i>=0;i--){
+            total = b[i];
+            for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j];  }
+            b[i] = total / A[i][i];
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "lubksb");
+               exit(1);
+       }
+}
+
 
 /*********************************************************************************************************************************/
 
index ef9e03cde900b4e01bdd61a99083801c0d25b913..e0933ee52564915d35bcf6e9f91f73c2cea80d2d 100644 (file)
@@ -39,6 +39,7 @@ public:
     double calcKendallSig(double, double); //length, coeff.
     
     vector<double> solveEquations(vector<vector<double> >, vector<double>);
+    vector<float> solveEquations(vector<vector<float> >, vector<float>);
     vector<vector<double> > getInverse(vector<vector<double> >);
 
     
@@ -64,6 +65,9 @@ private:
     
     void ludcmp(vector<vector<double> >&, vector<int>&, double&);
     void lubksb(vector<vector<double> >&, vector<int>&, vector<double>&);
+    
+    void ludcmp(vector<vector<float> >&, vector<int>&, float&);
+    void lubksb(vector<vector<float> >&, vector<int>&, vector<float>&);
 
     
 };
index 3ce1686327126b12c45afb51599b2e5a066e19aa..de21717ce03df122f7df9ddc014cc49900768250 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"2/12/2013\""
-VERSION = "\"1.29.2\""
+RELEASE_DATE = "\"5/24/2013\""
+VERSION = "\"1.31.0\""
 FORTAN_COMPILER = gfortran
 FORTRAN_FLAGS = 
 
diff --git a/makelookupcommand.cpp b/makelookupcommand.cpp
new file mode 100644 (file)
index 0000000..5b280f5
--- /dev/null
@@ -0,0 +1,611 @@
+//
+//  makelookupcommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 5/14/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "makelookupcommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeLookupCommand::setParameters(){
+       try {
+               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+        CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
+        CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
+        CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
+               CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
+        CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
+        CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MakeLookupCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
+               helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
+               helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
+        helpString += "The flow parameter is used to provide the flow data. It is required.\n";
+        helpString += "The error parameter is used to provide the error summary. It is required.\n";
+        helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
+        helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
+        helpString += "The threshold parameter is ....Default=10000.\n";
+        helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
+               helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
+               helpString += "new(...)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MakeLookupCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "lookup") {  pattern = "[filename],lookup"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+MakeLookupCommand::MakeLookupCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["lookup"] = tempOutNames; 
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+MakeLookupCommand::MakeLookupCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["lookup"] = tempOutNames; 
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                string path;
+                               it = parameters.find("flow");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["flow"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("error");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["error"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("reference");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["reference"] = inputDir + it->second;                }
+                               }
+                
+            }
+                        
+                       //check for parameters
+            errorFileName = validParameter.validFile(parameters, "error", true);
+                       if (errorFileName == "not open") { errorFileName = ""; abort = true; }
+                       else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine();  abort = true; }
+                       
+                       flowFileName = validParameter.validFile(parameters, "flow", true);
+                       if (flowFileName == "not open") { flowFileName = ""; abort = true; }
+                       else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine();  abort = true; }
+                       else {   m->setFlowFile(flowFileName);  }
+                       
+                       refFastaFileName = validParameter.validFile(parameters, "reference", true);
+                       if (refFastaFileName == "not open") { abort = true; }
+                       else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine();  abort = true; }
+                      
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
+                       }
+            
+            string temp = validParameter.validFile(parameters, "threshold", false);    if (temp == "not found"){       temp = "10000"; }
+                       m->mothurConvert(temp, thresholdCount);
+            
+            barcodeSequence = validParameter.validFile(parameters, "barcode", false);  if (barcodeSequence == "not found"){    barcodeSequence = "AACCGTGTC";  }
+            
+            keySequence = validParameter.validFile(parameters, "key", false);  if (keySequence == "not found"){        keySequence = "TCAG";   }
+            
+            temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){    temp = "A";     }
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+            }
+            else {
+                if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
+                else if(toupper(temp[0]) == 'B'){
+                    flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
+                else if(toupper(temp[0]) == 'I'){
+                    flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
+                else {
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+                }
+            }
+                       
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MakeLookupCommand::execute(){
+       try {
+        
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        cout.setf(ios::fixed, ios::floatfield);
+        cout.setf(ios::showpoint);
+        
+        double gapOpening = 10;
+        int maxHomoP = 101;
+        vector<vector<double> > penaltyMatrix; penaltyMatrix.resize(maxHomoP);
+        for(int i=0;i<maxHomoP;i++){
+            penaltyMatrix[i].resize(maxHomoP, 5);
+            penaltyMatrix[i][i] = 0;
+        }
+        
+        //Create flows for reference sequences...
+        ifstream refFASTA;
+        m->openInputFile(refFastaFileName, refFASTA);                        //  *   open reference sequence file
+        map<string, vector<double> > refFlowgrams;
+        
+        while(!refFASTA.eof()){
+            if (m->control_pressed) { refFASTA.close(); return 0; }
+            
+            Sequence seq(refFASTA);  m->gobble(refFASTA);
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: seq = " + seq.getName() + ".\n"); }
+            
+            string fullSequence = keySequence + barcodeSequence + seq.getAligned(); //  *   concatenate the keySequence, barcodeSequence, and
+            //      referenceSequences
+            refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); //  *   translate concatenated sequences into flowgram
+        }
+        refFASTA.close();
+        
+        vector<vector<double> > lookupTable; lookupTable.resize(1000);
+        for(int i=0;i<1000;i++){
+            lookupTable[i].resize(11, 0);
+        }
+        
+        if (m->debug) { m->mothurOut("[DEBUG]: here .\n"); }
+        //Loop through each sequence in the flow file and the error summary file.
+        ifstream flowFile;
+        m->openInputFile(flowFileName, flowFile);
+        int numFlows;
+        flowFile >> numFlows;
+        
+        if (m->debug) { m->mothurOut("[DEBUG]: numflows = " + toString(numFlows) +  ".\n"); }
+        
+        ifstream errorFile;
+        m->openInputFile(errorFileName, errorFile);
+        m->getline(errorFile); //grab headers
+        
+        string errorQuery, flowQuery, referenceName, dummy;
+        string chimera;
+        float intensity;
+        
+        vector<double> std;  std.resize(11, 0);
+        
+        while(errorFile && flowFile){
+            
+            if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+            
+            //  * if it's chimeric, chuck it
+            errorFile >> errorQuery >> referenceName;
+            for(int i=2;i<40;i++){
+                errorFile >> dummy;
+            }
+            errorFile >> chimera;
+           
+            
+            if(chimera == "2"){
+                m->getline(flowFile);
+            }
+            else{
+                
+                flowFile >> flowQuery >> dummy;
+                if(flowQuery != errorQuery){    cout << flowQuery << " != " << errorQuery << endl;  }
+                
+                map<string, vector<double> >::iterator it = refFlowgrams.find(referenceName);       //  * compare sequence to its closest reference
+                if (it == refFlowgrams.end()) {
+                    m->mothurOut("[WARNING]: missing reference flow " + referenceName + ", ignoring flow " + flowQuery + ".\n");
+                    m->getline(flowFile); m->gobble(flowFile);
+                }else {
+                    vector<double> refFlow = it->second;
+                    vector<double> flowgram; flowgram.resize(numFlows);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: flowQuery = " + flowQuery +  ".\t" + "refName " + referenceName+  ".\n"); }
+                    
+                    for(int i=0;i<numFlows;i++){
+                        flowFile >> intensity;
+                        flowgram[i] = intensity;// (int)round(100 * intensity);
+                    }
+                    m->gobble(flowFile);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: before align.\n"); }
+                    
+                    alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: after align.\n"); }
+                    
+                    if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+                    
+                    for(int i=0;i<flowgram.size();i++){
+                        int count = (int)round(100*flowgram[i]);
+                        if(count > 1000){count = 999;}
+                        if(abs(flowgram[i]-refFlow[i])<=0.50){
+                            lookupTable[count][int(refFlow[i])]++;               //  * build table
+                            std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
+                        }
+                    }
+                }
+                
+            }
+            m->gobble(errorFile);
+            m->gobble(flowFile);
+        }
+        errorFile.close(); flowFile.close();
+        
+        
+        //get probabilities
+        vector<int> counts; counts.resize(11, 0);
+        int totalCount = 0;
+        for(int i=0;i<1000;i++){
+            for(int j=0;j<11;j++){
+                counts[j] += lookupTable[i][j];
+                totalCount += lookupTable[i][j];
+            }
+        }
+        
+        int N = 11;
+        for(int i=0;i<11;i++){
+            if(counts[i] < thresholdCount){ N = i; break; }  //bring back
+            std[i] = sqrt(std[i]/(double)(counts[i]));  //bring back
+        }
+        
+        regress(std, N);  //bring back
+        
+        if (m->control_pressed) { return 0; }
+        
+        double minProbability = 0.1 / (double)totalCount;
+        
+        //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
+        double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
+        
+        for(int i=0;i<1000;i++){
+            if (m->control_pressed) { return 0; }
+            
+            for(int j=0;j<N;j++){
+                if(lookupTable[i][j] == 0){
+                    lookupTable[i][j] = 1;  //bring back
+                }
+                lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j]));  //bring back
+            }
+            
+            for(int j=N;j<11;j++){  //bring back
+                double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
+                if(normalProbability > minProbability){
+                    lookupTable[i][j] = -log(normalProbability);
+                }
+                else{
+                    lookupTable[i][j] = -log(minProbability);
+                }
+            }
+        }
+        
+        
+        //calculate the probability of each homopolymer length
+        vector<double> negLogHomoProb; negLogHomoProb.resize(11, 0.00);  //bring back
+        for(int i=0;i<N;i++){
+            negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
+        }
+        regress(negLogHomoProb, N);
+        
+        if (m->control_pressed) { return 0; }
+        
+        //output data table.  column one is the probability of each homopolymer length
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+               string outputFile = getOutputFileName("lookup",variables);
+               outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
+        
+        ofstream lookupFile;
+        m->openOutputFile(outputFile, lookupFile);
+        lookupFile.precision(8);
+        
+        for(int j=0;j<11;j++){
+            //        lookupFile << counts[j];
+            lookupFile << showpoint << negLogHomoProb[j]; //bring back
+            for(int i=0;i<1000;i++){
+                lookupFile << '\t' << lookupTable[i][j];
+            }
+            lookupFile << endl;
+        }
+        lookupFile.close();
+        
+        m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
+         
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "execute");
+               exit(1);
+       }
+}
+//******************************************************************************************************************************
+
+vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
+    try {
+        int seqLength = (int)sequence.length();
+        int numFlows = (int)order.length();
+        vector<double> flowgram;
+        
+        int orderIndex = 0;
+        int sequenceIndex = 0;
+        
+        while(orderIndex < numFlows && sequenceIndex < seqLength){
+            
+            if (m->control_pressed) { return flowgram; }
+            
+            int homopolymerLength = 1;
+            
+            char base = sequence[sequenceIndex];
+            
+            while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
+                homopolymerLength++;
+                sequenceIndex++;
+            }
+            
+            sequenceIndex++;
+            
+            for(int i=orderIndex; i<orderIndex+numFlows;i++){
+                if(order[i%numFlows] == base){
+                    //flowgram[i] = homopolymerLength;
+                    orderIndex = i%numFlows;
+                    break;
+                }else {  flowgram.push_back(0); }
+            }
+            
+            //flowgram[orderIndex] = homopolymerLength;
+            flowgram.push_back(homopolymerLength);
+            
+            orderIndex++;
+            orderIndex = orderIndex % numFlows;
+        }
+        
+        return flowgram;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
+               exit(1);
+       }
+}
+//******************************************************************************************************************************
+
+int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
+    try {
+        int numQueryFlows = (int)flowgram.size();
+        int numRefFlows = (int)refFlow.size();
+        
+            //cout << numQueryFlows << '\t' << numRefFlows << endl;
+        
+        vector<vector<double> > scoreMatrix; scoreMatrix.resize(numQueryFlows+1);
+        vector<vector<char> > directMatrix; directMatrix.resize(numQueryFlows+1);
+        
+        for(int i=0;i<=numQueryFlows;i++){
+            if (m->control_pressed) { return 0; }
+            scoreMatrix[i].resize(numRefFlows+1, 0.00);
+            directMatrix[i].resize(numRefFlows+1, 'x');
+            
+            scoreMatrix[i][0] = i * gapOpening;
+            directMatrix[i][0] = 'u';
+        }
+        
+            //cout << numQueryFlows << '\t' << numRefFlows << endl;
+        
+        
+        for(int i=0;i<=numRefFlows;i++){
+            scoreMatrix[0][i] = i * gapOpening;
+            directMatrix[0][i] = 'l';
+        }
+        
+        
+        for(int i=1;i<=numQueryFlows;i++){
+            for(int j=1;j<=numRefFlows;j++){
+                if (m->control_pressed) { return 0; }
+                double diagonal = 1000000000;
+                if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
+                    diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
+                }
+                double up = scoreMatrix[i-1][j] + gapOpening;
+                double left = scoreMatrix[i][j-1] + gapOpening;
+                
+                double minScore = diagonal;
+                char direction = 'd';
+                
+                if(left < diagonal && left < up){
+                    minScore = left;
+                    direction = 'l';
+                }
+                else if(up < diagonal && up < left){
+                    minScore = up;
+                    direction = 'u';
+                }
+                
+                scoreMatrix[i][j] = minScore;
+                directMatrix[i][j] = direction;
+                
+            }
+        }
+        
+        int minRowIndex = numQueryFlows;
+        double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
+        for(int i=0;i<numQueryFlows;i++){
+            if (m->control_pressed) { return 0; }
+            if(scoreMatrix[i][numRefFlows] < minRowScore){
+                minRowScore = scoreMatrix[i][numRefFlows];
+                minRowIndex = i;
+            }
+        }
+        
+        int minColumnIndex = numRefFlows;
+        double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
+        for(int i=0;i<numRefFlows;i++){
+            if (m->control_pressed) { return 0; }
+            if(scoreMatrix[numQueryFlows][i] < minColumnScore){
+                minColumnScore = scoreMatrix[numQueryFlows][i];
+                minColumnIndex = i;
+            }
+        }
+        
+        
+        int i=minRowIndex;
+        int j= minColumnIndex;
+        
+        vector<double> newFlowgram;
+        vector<double> newRefFlowgram;
+       
+        while(i > 0 && j > 0){
+            if (m->control_pressed) { return 0; }
+            if(directMatrix[i][j] == 'd'){
+                newFlowgram.push_back(flowgram[i-1]);
+                newRefFlowgram.push_back(refFlow[j-1]);
+                
+                i--;
+                j--;
+            }
+            else if(directMatrix[i][j] == 'l'){
+                newFlowgram.push_back(0);
+                newRefFlowgram.push_back(refFlow[j-1]);
+                
+                j--;
+            }
+            else if(directMatrix[i][j] == 'u'){
+                newFlowgram.push_back(flowgram[i-1]);
+                newRefFlowgram.push_back(0);
+                
+                i--;
+            }
+        }
+        
+        flowgram = newFlowgram;
+        refFlow = newRefFlowgram;
+        
+        return 0;
+    
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
+               exit(1);
+       }
+}
+
+//******************************************************************************************************************************
+
+int MakeLookupCommand::regress(vector<double>& data, int N){
+    try {
+        //fit data for larger values of N
+        double xMean = 0;
+        double yMean = 0;
+        
+        for(int i=1;i<N;i++){
+            if (m->control_pressed) { return 0; }
+            xMean += i;
+            yMean += data[i];
+        }
+        xMean /= (N-1);
+        yMean /= (N-1);
+        
+        double numerator = 0;
+        double denomenator = 0;
+        for(int i=1;i<N;i++){
+            if (m->control_pressed) { return 0; }
+            numerator += (i-xMean)*(data[i] - yMean);
+            denomenator += (i-xMean) * (i-xMean);
+        }
+        double slope = numerator / denomenator;
+        double intercept = yMean - slope * xMean;
+        
+        for(int i=N;i<11;i++){
+            data[i] = intercept + i * slope;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeLookupCommand", "regress");
+               exit(1);
+       }
+}
+
+//******************************************************************************************************************************
+
+//**********************************************************************************************************************
+
+
diff --git a/makelookupcommand.h b/makelookupcommand.h
new file mode 100644 (file)
index 0000000..743e756
--- /dev/null
@@ -0,0 +1,51 @@
+//
+//  makelookupcommand.h
+//  Mothur
+//
+//  Created by SarahsWork on 5/14/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_makelookupcommand_h
+#define Mothur_makelookupcommand_h
+
+#include "command.hpp"
+#include "sequence.hpp"
+
+/**************************************************************************************************/
+
+class MakeLookupCommand : public Command {
+public:
+    MakeLookupCommand(string);
+    MakeLookupCommand();
+    ~MakeLookupCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "make.lookup";                 }
+    string getCommandCategory()                { return "Sequence Processing";         }
+    
+    string getOutputPattern(string);
+       string getHelpString();
+    string getCitation() { return "http://www.mothur.org/wiki/Make.lookup"; }
+    string getDescription()            { return "create custom lookup files for use with shhh.flows"; }
+    
+    int execute();
+    void help() { m->mothurOut(getHelpString()); }
+    
+private:
+    bool abort;
+    string outputDir, flowFileName, errorFileName, flowOrder, refFastaFileName, barcodeSequence, keySequence;
+    vector<string> outputNames;
+    int thresholdCount;
+    
+    vector<double> convertSeqToFlow(string sequence, string order);
+    int alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder);
+    int regress(vector<double>& data, int N);
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif
index fc16d204b142e5a6f55783f56bc52aafb8e6a4d1..8a3b6313072a158f611e1a69a84d0a9713add18c 100644 (file)
@@ -2242,9 +2242,11 @@ map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeq
 /************************************************************/
 int MothurOut::checkName(string& name) {
     try {
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
-        }        
+        if (modifyNames) {
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+            }
+        }
         return 0;
     }
        catch(exception& e) {
@@ -3270,6 +3272,26 @@ vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
                exit(1);
        }
 }
+/**************************************************************************************************/
+double MothurOut::getAverage(vector<double> dists) {
+       try{
+        double average = 0;
+        
+        for (int i = 0; i < dists.size(); i++) {
+            average += dists[i];
+        }
+       
+        //finds average.
+        average /= (double) dists.size(); 
+        
+        return average;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverage");
+               exit(1);
+       }
+}
+
 /**************************************************************************************************/
 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
        try{
index 643ecd58a149aebc1ac88f500d4658de98d5e145..f99eac1917cbe2ce5a1ce278c66d27d1df71737e 100644 (file)
@@ -70,7 +70,7 @@ class MothurOut {
                vector<string> binLabelsInFile;
                vector<string> currentBinLabels;
                string saveNextLabel, argv, sharedHeaderMode, groupMode;
-               bool printedHeaders, commandInputsConvertError, changedSeqNames;
+               bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
                
                //functions from mothur.h
                //file operations
@@ -163,6 +163,7 @@ class MothurOut {
         vector<double> getStandardDeviation(vector< vector<double> >&);
         vector<double> getStandardDeviation(vector< vector<double> >&, vector<double>&);
         vector<double> getAverages(vector< vector<double> >&);
+        double getAverage(vector<double>);
         vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&);
         vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&, vector< vector<seqDist> >&);
         vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&, string);
@@ -265,6 +266,7 @@ class MothurOut {
                        sharedHeaderMode = "";
             groupMode = "group";
             changedSeqNames = false;
+            modifyNames = true;
                }
                ~MothurOut();
 
index 74e3e2bc9f0bdedc88f90e7f923eafdb511470d6..051c1dfb057c0148c3f0053f94707b7e6d034dc0 100644 (file)
@@ -191,7 +191,7 @@ int ParseFastaQCommand::execute(){
                        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else { 
                 name = name.substr(1); 
-                for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+                m->checkName(name);
             }
                        
                        //read sequence
@@ -204,7 +204,7 @@ int ParseFastaQCommand::execute(){
                        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        else { 
                 name2 = name2.substr(1);  
-                for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+                m->checkName(name2);
             }
                        
                        //read quality scores
index 6a0485c133b2868670e49e5873186e5d10df474d..b12ca1fe742240a4c80e96d0d82bcb2ef612fa18 100644 (file)
@@ -54,31 +54,18 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
                                namesOfGroupCombos.push_back(groups);
                        }
                }
-               
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               if(processors == 1){
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
-               }else{
-                       lines.clear();
-                       int numPairs = namesOfGroupCombos.size();
-                       
-                       int numPairsPerProcessor = numPairs / processors;
-                       
-                       for (int i = 0; i < processors; i++) {
-                               int startPos = i * numPairsPerProcessor;
-                               
-                               if(i == processors - 1){
-                                       numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                               }
-                               
-                               lines.push_back(linePair(startPos, numPairsPerProcessor));
-                       }
-                       
-                       data = createProcesses(t, namesOfGroupCombos, ct);
-               }
-       #else
-               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
-       #endif
+        
+        lines.clear();
+        int numPairs = namesOfGroupCombos.size();
+        int numPairsPerProcessor = numPairs / processors;
+        
+        for (int i = 0; i < processors; i++) {
+            int startPos = i * numPairsPerProcessor;
+            if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+            lines.push_back(linePair(startPos, numPairsPerProcessor));
+        }
+        
+        data = createProcesses(t, namesOfGroupCombos, ct);
                
                return data;
                
@@ -92,12 +79,13 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
 
 EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
+        int process = 1;
                vector<int> processIDS;
                
                EstOutput results;
-               
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -160,9 +148,47 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGr
                        in.close();
                        m->mothurRemove(s);
                }
+#else
+        //fill in functions
+        vector<parsData*> pDataArray;
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            parsData* temppars = new parsData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount);
+                       pDataArray.push_back(temppars);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyParsimonyThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+               }
+               
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
+                       delete cts[i];
+            delete trees[i];
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
                
-               return results;
 #endif         
+        return results;
        }
        catch(exception& e) {
                m->errorOut(e, "Parsimony", "createProcesses");
index bf0e0d4f90198ef353c2c1899414f699454c8a7e..51c0496b1405c43793ba2c47c7557c6b0c43630f 100644 (file)
@@ -38,7 +38,91 @@ class Parsimony : public TreeCalculator  {
                EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*); 
                EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
 };
-
 /***********************************************************************/
+struct parsData {
+    int start;
+       int num;
+       MothurOut* m;
+    EstOutput results;
+    vector< vector<string> > namesOfGroupCombos;
+    Tree* t;
+    CountTable* ct;
+    
+       parsData(){}
+       parsData(MothurOut* mout, int st, int en, vector< vector<string> > ngc, Tree* tree, CountTable* count) {
+        m = mout;
+               start = st;
+               num = en;
+        namesOfGroupCombos = ngc;
+        t = tree;
+        ct = count;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyParsimonyThreadFunction(LPVOID lpParam){
+       parsData* pDataArray;
+       pDataArray = (parsData*)lpParam;
+       try {
+        
+        pDataArray->results.resize(pDataArray->num);
+               
+               Tree* copyTree = new Tree(pDataArray->ct);
+               int count = 0;
+               
+               for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
+            
+                       if (pDataArray->m->control_pressed) { delete copyTree; return 0; }
+            
+                       int score = 0;
+                       
+                       //groups in this combo
+                       vector<string> groups = pDataArray->namesOfGroupCombos[h];
+                       
+                       //copy users tree so that you can redo pgroups
+                       copyTree->getCopy(pDataArray->t);
+                       
+                       //create pgroups that reflect the groups the user want to use
+                       for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
+                               copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups));
+                       }
+                       
+                       for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
+                               
+                               if (pDataArray->m->control_pressed) { return 0; }
+                               
+                               int lc = copyTree->tree[i].getLChild();
+                               int rc = copyTree->tree[i].getRChild();
+                               
+                               int iSize = copyTree->tree[i].pGroups.size();
+                               int rcSize = copyTree->tree[rc].pGroups.size();
+                               int lcSize = copyTree->tree[lc].pGroups.size();
+                               
+                               //if isize are 0 then that branch is to be ignored
+                               if (iSize == 0) { }
+                               else if ((rcSize == 0) || (lcSize == 0)) { }
+                               //if you have more groups than either of your kids then theres been a change.
+                               else if(iSize > rcSize || iSize > lcSize){
+                                       score++;
+                               }
+                       }
+                       
+                       pDataArray->results[count] = score;
+                       count++;
+               }
+        
+               delete copyTree;
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "Parsimony", "MyParsimonyThreadFunction");
+               exit(1);
+       }
+}
+#endif
 
 #endif
index 002c577798cd3977fa68ba1557b196cf90bf716b..339649c7dcc814ac409e5b8ec1dfd432b544e43d 100644 (file)
@@ -362,34 +362,22 @@ int PhyloDiversityCommand::execute(){
                                if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                               if(processors == 1){
-                                       driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
-                               }else{
-                                       if (rarefy) {
-                                               vector<int> procIters;
-                                               
-                                               int numItersPerProcessor = iters / processors;
-                                               
-                                               //divide iters between processes
-                                               for (int h = 0; h < processors; h++) {
-                                                       if(h == processors - 1){
-                                                               numItersPerProcessor = iters - h * numItersPerProcessor;
-                                                       }
-                                                       procIters.push_back(numItersPerProcessor);
-                                               }
-                                               
-                                               createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
-                                               
-                                       }else{ //no need to paralellize if you dont want to rarefy
-                                               driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
-                                       }
-                               }
-
-                       #else
-                               driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
-                       #endif
-
+            if (rarefy) {
+                vector<int> procIters;
+                int numItersPerProcessor = iters / processors;
+                
+                //divide iters between processes
+                for (int h = 0; h < processors; h++) {
+                    if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+                    procIters.push_back(numItersPerProcessor);
+                }
+                
+                createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
+                
+            }else{ //no need to paralellize if you dont want to rarefy
+                driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);     
+            }
+                               
                        if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
                }
                
@@ -415,12 +403,13 @@ int PhyloDiversityCommand::execute(){
 //**********************************************************************************************************************
 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
        try {
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
+        int process = 1;
                
                vector<int> processIDS;
                map< string, vector<float> >::iterator itSum;
-               
+
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -489,6 +478,56 @@ int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map<
                        in.close();
                        m->mothurRemove(inTemp);
                }
+#else
+        
+        //fill in functions
+        vector<phylodivData*> pDataArray;
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
+        map<string, int> rootForGroup = getRootForGroups(t);
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            map<string, vector<float> > copydiv = div;
+            map<string, vector<float> > copysumDiv = sumDiv;
+            vector<int> copyrandomLeaf = randomLeaf;
+            set<int> copynumSampledList = numSampledList;
+            map<string, int> copyRootForGrouping = rootForGroup;
+            
+            phylodivData* temp = new phylodivData(m, procIters[i], copydiv, copysumDiv, copyTree, copyCount, increment, copyrandomLeaf, copynumSampledList, copyRootForGrouping);
+                       pDataArray.push_back(temp);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyPhyloDivThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+               }
+               
+               driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (itSum = pDataArray[i]->sumDiv.begin(); itSum != pDataArray[i]->sumDiv.end(); itSum++) {
+                for (int k = 0; k < (itSum->second).size(); k++) {
+                    sumDiv[itSum->first][k] += (itSum->second)[k];
+                }
+            }
+                       delete cts[i];
+            delete trees[i];
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
                
 #endif
 
@@ -594,7 +633,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofst
                        else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
                                
                        out << setprecision(4) << score << endl;
-            cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
+            //cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
                }
                                        
                out.close();
index ec372ba20acac93709ca4fff0b58c8a42dadf048..045d2067c8bc12a0b3482b5cd50c8ea4a3fd9556 100644 (file)
@@ -51,5 +51,144 @@ private:
 
 };
 
+/***********************************************************************/
+struct phylodivData {
+    int numIters;
+       MothurOut* m;
+    map< string, vector<float> > div;
+    map<string, vector<float> > sumDiv;
+    map<string, int> rootForGroup;
+    vector<int> randomLeaf;
+    set<int> numSampledList;
+    int increment;
+    Tree* t;
+    CountTable* ct;
+    bool includeRoot;
+       
+   
+       phylodivData(){}
+       phylodivData(MothurOut* mout, int ni,  map< string, vector<float> > cd, map< string, vector<float> > csd, Tree* tree, CountTable* count, int incre, vector<int> crl, set<int> nsl, map<string, int> rfg) {
+        m = mout;
+        t = tree;
+        ct = count;
+        div = cd;
+        numIters = ni;
+        sumDiv = csd;
+        increment = incre;
+        randomLeaf = crl;
+        numSampledList = nsl;
+        rootForGroup = rfg;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPhyloDivThreadFunction(LPVOID lpParam){
+       phylodivData* pDataArray;
+       pDataArray = (phylodivData*)lpParam;
+       try {
+        int numLeafNodes = pDataArray->randomLeaf.size();
+               vector<string> mGroups = pDataArray->m->getGroups();
+        
+               for (int l = 0; l < pDataArray->numIters; l++) {
+            random_shuffle(pDataArray->randomLeaf.begin(), pDataArray->randomLeaf.end());
+            
+            //initialize counts
+            map<string, int> counts;
+            vector< map<string, bool> > countedBranch;
+            for (int i = 0; i < pDataArray->t->getNumNodes(); i++) {
+                map<string, bool> temp;
+                for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
+                countedBranch.push_back(temp);
+            }
+            
+            for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }
+            
+            for(int k = 0; k < numLeafNodes; k++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                //calc branch length of randomLeaf k
+                //vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
+                //(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots
+                /////////////////////////////////////////////////////////////////////////////////////
+                vector<float> br;
+                int index = pDataArray->randomLeaf[k];
+                
+                vector<string> groups = pDataArray->t->tree[pDataArray->randomLeaf[k]].getGroup();
+                br.resize(groups.size(), 0.0);
+
+                //you are a leaf
+                if(pDataArray->t->tree[index].getBranchLength() != -1){
+                    for (int k = 0; k < groups.size(); k++) {
+                        br[k] += abs(pDataArray->t->tree[index].getBranchLength());
+                    }
+                }
+
+                index = pDataArray->t->tree[index].getParent();
+                
+                //while you aren't at root
+                while(pDataArray->t->tree[index].getParent() != -1){
+                    
+                    if (pDataArray->m->control_pressed) {  return 0; }
+                    
+                    for (int k = 0; k < groups.size(); k++) {
+                        
+                        if (index >= pDataArray->rootForGroup[groups[k]]) { countedBranch[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
+                        
+                        if (!countedBranch[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
+                            if (pDataArray->t->tree[index].getBranchLength() != -1) {
+                                br[k] += abs(pDataArray->t->tree[index].getBranchLength());
+                            }
+                            countedBranch[index][groups[k]] = true;
+                        }
+                    }
+                    index = pDataArray->t->tree[index].getParent();    
+                }
+                /////////////////////////////////////////////////////////////////////////////////////
+                
+                //for each group in the groups update the total branch length accounting for the names file
+                groups = pDataArray->t->tree[pDataArray->randomLeaf[k]].getGroup();
+                
+                for (int j = 0; j < groups.size(); j++) {
+                    
+                    if (pDataArray->m->inUsersGroups(groups[j], mGroups)) {
+                        int numSeqsInGroupJ = 0;
+                        map<string, int>::iterator it;
+                        it = pDataArray->t->tree[pDataArray->randomLeaf[k]].pcount.find(groups[j]);
+                        if (it != pDataArray->t->tree[pDataArray->randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
+                            numSeqsInGroupJ = it->second;
+                        }
+                        
+                        if (numSeqsInGroupJ != 0) {    pDataArray->div[groups[j]][(counts[groups[j]]+1)] = pDataArray->div[groups[j]][counts[groups[j]]] + br[j];  }
+                        
+                        for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+                            pDataArray->div[groups[j]][s] = pDataArray->div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
+                        }
+                        counts[groups[j]] += numSeqsInGroupJ;
+                    }
+                }
+            }
+            
+            
+            //add this diversity to the sum
+            for (int j = 0; j < mGroups.size(); j++) {
+                for (int g = 0; g < pDataArray->div[mGroups[j]].size(); g++) {
+                    pDataArray->sumDiv[mGroups[j]][g] += pDataArray->div[mGroups[j]][g];
+                }
+            }
+            
+        }
+
+        return 0;
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PhyloDiversityCommand", "MyPhyloDivThreadFunction");
+               exit(1);
+       }
+}
+#endif
+
 #endif
 
index d31d687c0af871ed4d5bbbde157c671900aa67d2..8a8db37612937ee86d7d11adf1b9973d8cba634d 100644 (file)
@@ -974,14 +974,35 @@ bool PcrSeqsCommand::readOligos(){
                         else if (c == 32 || c == 9){;} //space or tab
                                        } 
                                        primers[oligo] = primerCount; primerCount++;
+                    //cout << "for oligo = " << oligo  << endl;
                 }else if(type == "REVERSE"){
                     string oligoRC = reverseOligo(oligo);
                     revPrimer.push_back(oligoRC);
-                    //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
+                    //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
                                }else if(type == "BARCODE"){
-                                       inOligos >> group;
+                    inOligos >> group;
+                }else if(type == "PRIMER"){
+                                       m->gobble(inOligos);
+                    primers[oligo] = primerCount; primerCount++;
+                                       
+                    string roligo="";
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    revPrimer.push_back(reverseOligo(roligo));
+                    
+                    // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                        char c = inOligos.get();
+                        if (c == 10 || c == 13 || c == -1){    break;  }
+                        else if (c == 32 || c == 9){;} //space or tab
+                                       }
+                    //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
                                }else if((type == "LINKER")||(type == "SPACER")) {;}
-                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
                        }
                        m->gobble(inOligos);
                }       
index 24d22845c5a4fc33d109888e5f2007d25134672e..bd3f172c2bcaa3e13ced0b5c57d92e39c9073969 100644 (file)
@@ -903,15 +903,9 @@ void PreClusterCommand::readNameFile(){
                while (!in.eof()) {
                        in >> firstCol >> secondCol; m->gobble(in);
             
-            for (int i = 0; i < firstCol.length(); i++) {
-                if (firstCol[i] == ':') { firstCol[i] = '_'; m->changedSeqNames = true; }
-            }
-            
-            int size = 1;
-            for (int i = 0; i < secondCol.length(); i++) {
-                if (secondCol[i] == ':') { secondCol[i] = '_'; m->changedSeqNames = true; }
-                else if(secondCol[i] == ','){  size++; }
-            }
+            m->checkName(firstCol);
+            m->checkName(secondCol);
+            int size = m->getNumNames(secondCol);
             
                        names[firstCol] = secondCol;
             sizes[firstCol] = size;
index 33ca1728052e4db59372fc2d8129e5bcbaf729c5..26492245e2b9805144125052d57045a834bc3f95 100644 (file)
@@ -89,9 +89,7 @@ string QualityScores::getSequenceName(ifstream& qFile) {
             
                        name = name.substr(1); 
             
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
             
         }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
@@ -106,10 +104,7 @@ string QualityScores::getSequenceName(ifstream& qFile) {
 void QualityScores::setName(string name) {
        try {
       
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-        }     
-    
+        m->checkName(name);   
         seqName = name;
        }
        catch(exception& e) {
diff --git a/randomnumber.cpp b/randomnumber.cpp
new file mode 100644 (file)
index 0000000..a558fd3
--- /dev/null
@@ -0,0 +1,315 @@
+#ifndef RANDOMNUMBER_H
+#define RANDOMNUMBER_H
+
+/*
+ *  randomnumber.cpp
+ *  
+ *
+ *  Created by Pat Schloss on 7/6/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "randomnumber.h"
+#include <cmath>
+
+/**************************************************************************************************/
+
+RandomNumberGenerator::RandomNumberGenerator(){
+    srand( (unsigned)time( NULL ) );
+}
+
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomUniform(){
+       
+       float randUnif = 0.0000;
+       
+       while(randUnif == 0.0000){
+               
+               randUnif = rand() / (float)RAND_MAX;
+               
+       }
+       
+       return randUnif;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomExp(){
+       
+       float randExp = 0.0000;
+       
+       while(randExp == 0.0000){
+               
+               randExp = -log(randomUniform());
+               
+       }
+       
+       return randExp;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomNorm(){
+       
+       float x, y, rsquare;
+       
+       do{
+               x = 2.0 * randomUniform() - 1.0;
+               y = 2.0 * randomUniform() - 1.0;
+       
+               rsquare = x * x + y * y;
+       } while(rsquare >= 1.0 || rsquare == 0.0);
+       
+       float fac = sqrt(-2.0 * log(rsquare)/rsquare);
+
+       return x * fac;
+}
+
+
+/**************************************************************************************************/
+/*
+ *     Slightly modified version of:
+ *
+ *  Mathlib : A C Library of Special Functions
+ *  Copyright (C) 1998 Ross Ihaka
+ *  Copyright (C) 2000-2005 The R Development Core Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, a copy is available at
+ *  http://www.r-project.org/Licenses/
+ *
+ *  SYNOPSIS
+ *
+ *    #include <Rmath.h>
+ *    float rgamma(float a, float scale);
+ *
+ *  DESCRIPTION
+ *
+ *    Random variates from the gamma distribution.
+ *
+ *  REFERENCES
+ *
+ *    [1] Shape parameter a >= 1.  Algorithm GD in:
+ *
+ *        Ahrens, J.H. and Dieter, U. (1982).
+ *        Generating gamma variates by a modified
+ *        rejection technique.
+ *        Comm. ACM, 25, 47-54.
+ *
+ *
+ *    [2] Shape parameter 0 < a < 1. Algorithm GS in:
+ *
+ *        Ahrens, J.H. and Dieter, U. (1974).
+ *        Computer methods for sampling from gamma, beta,
+ *        poisson and binomial distributions.
+ *        Computing, 12, 223-246.
+ *
+ *    Input: a = parameter (mean) of the standard gamma distribution.
+ *    Output: a variate from the gamma(a)-distribution
+ */
+
+float RandomNumberGenerator::randomGamma(float a)
+{
+       /* Constants : */
+    const static float sqrt32 = 5.656854;
+    const static float exp_m1 = 0.36787944117144232159;/* exp(-1) = 1/e */
+       float scale = 1.0;
+       
+    /* Coefficients q[k] - for q0 = sum(q[k]*a^(-k))
+     * Coefficients a[k] - for q = q0+(t*t/2)*sum(a[k]*v^k)
+     * Coefficients e[k] - for exp(q)-1 = sum(e[k]*q^k)
+     */
+    const static float q1 = 0.04166669;
+    const static float q2 = 0.02083148;
+    const static float q3 = 0.00801191;
+    const static float q4 = 0.00144121;
+    const static float q5 = -7.388e-5;
+    const static float q6 = 2.4511e-4;
+    const static float q7 = 2.424e-4;
+       
+    const static float a1 = 0.3333333;
+    const static float a2 = -0.250003;
+    const static float a3 = 0.2000062;
+    const static float a4 = -0.1662921;
+    const static float a5 = 0.1423657;
+    const static float a6 = -0.1367177;
+    const static float a7 = 0.1233795;
+       
+    /* State variables [FIXME for threading!] :*/
+    static float aa = 0.;
+    static float aaa = 0.;
+    static float s, s2, d;    /* no. 1 (step 1) */
+    static float q0, b, si, c;/* no. 2 (step 4) */
+       
+    float e, p, q, r, t, u, v, w, x, ret_val;
+       
+    if (a <= 0.0 || scale <= 0.0){     cout << "error alpha or scale parameter are less than zero." << endl; exit(1);  }
+               
+    if (a < 1.) { /* GS algorithm for parameters a < 1 */
+        e = 1.0 + exp_m1 * a;
+        for(;;) {
+            p = e * randomUniform();
+            if (p >= 1.0) {
+                x = -log((e - p) / a);
+                if (randomExp() >= (1.0 - a) * log(x))
+                    break;
+            } else {
+                x = exp(log(p) / a);
+                if (randomExp() >= x)
+                    break;
+            }
+        }
+        return scale * x;
+    }
+       
+    /* --- a >= 1 : GD algorithm --- */
+       
+    /* Step 1: Recalculations of s2, s, d if a has changed */
+    if (a != aa) {
+        aa = a;
+        s2 = a - 0.5;
+        s = sqrt(s2);
+        d = sqrt32 - s * 12.0;
+    }
+    /* Step 2: t = standard normal deviate,
+        x = (s,1/2) -normal deviate. */
+       
+    /* immediate acceptance (i) */
+    t = randomNorm();
+    x = s + 0.5 * t;
+    ret_val = x * x;
+    if (t >= 0.0)
+        return scale * ret_val;
+       
+    /* Step 3: u = 0,1 - uniform sample. squeeze acceptance (s) */
+    u = randomUniform();
+    if (d * u <= t * t * t)
+        return scale * ret_val;
+       
+    /* Step 4: recalculations of q0, b, si, c if necessary */
+       
+    if (a != aaa) {
+        aaa = a;
+        r = 1.0 / a;
+        q0 = ((((((q7 * r + q6) * r + q5) * r + q4) * r + q3) * r
+               + q2) * r + q1) * r;
+               
+        /* Approximation depending on size of parameter a */
+        /* The constants in the expressions for b, si and c */
+        /* were established by numerical experiments */
+               
+        if (a <= 3.686) {
+            b = 0.463 + s + 0.178 * s2;
+            si = 1.235;
+            c = 0.195 / s - 0.079 + 0.16 * s;
+        } else if (a <= 13.022) {
+            b = 1.654 + 0.0076 * s2;
+            si = 1.68 / s + 0.275;
+            c = 0.062 / s + 0.024;
+        } else {
+            b = 1.77;
+            si = 0.75;
+            c = 0.1515 / s;
+        }
+    }
+    /* Step 5: no quotient test if x not positive */
+       
+    if (x > 0.0) {
+        /* Step 6: calculation of v and quotient q */
+        v = t / (s + s);
+        if (fabs(v) <= 0.25)
+            q = q0 + 0.5 * t * t * ((((((a7 * v + a6) * v + a5) * v + a4) * v
+                                      + a3) * v + a2) * v + a1) * v;
+        else
+            q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+               
+               
+        /* Step 7: quotient acceptance (q) */
+        if (log(1.0 - u) <= q)
+            return scale * ret_val;
+    }
+       
+    for(;;) {
+        /* Step 8: e = standard exponential deviate
+         *      u =  0,1 -uniform deviate
+         *      t = (b,si)-float exponential (laplace) sample */
+        e = randomExp();
+        u = randomUniform();
+        u = u + u - 1.0;
+        if (u < 0.0)
+            t = b - si * e;
+        else
+            t = b + si * e;
+        /* Step  9:  rejection if t < tau(1) = -0.71874483771719 */
+        if (t >= -0.71874483771719) {
+            /* Step 10:  calculation of v and quotient q */
+            v = t / (s + s);
+            if (fabs(v) <= 0.25)
+                q = q0 + 0.5 * t * t *
+                               ((((((a7 * v + a6) * v + a5) * v + a4) * v + a3) * v
+                                 + a2) * v + a1) * v;
+            else
+                q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+            /* Step 11:  hat acceptance (h) */
+            /* (if q not positive go to step 8) */
+            if (q > 0.0) {
+                w = expm1(q);
+                /*  ^^^^^ original code had approximation with rel.err < 2e-7 */
+                /* if t is rejected sample again at step 8 */
+                if (c * fabs(u) <= w * exp(e - 0.5 * t * t))
+                    break;
+            }
+        }
+    } /* for(;;) .. until  `t' is accepted */
+    x = s + 0.5 * t;
+    return scale * x * x;
+}
+
+/**************************************************************************************************/
+//
+//     essentially swiped from http://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
+//
+/**************************************************************************************************/
+
+vector<float> RandomNumberGenerator::randomDirichlet(vector<float> alphas){
+
+       int nAlphas = (int)alphas.size();
+       vector<float> dirs(nAlphas, 0.0000);
+       
+       float sum = 0.0000;
+       for(int i=0;i<nAlphas;i++){
+               dirs[i] = randomGamma(alphas[i]);
+               sum += dirs[i];
+       }
+       
+       for(int i=0;i<nAlphas;i++){
+               dirs[i] /= sum;
+       }
+       
+       return dirs;
+       
+}
+
+/**************************************************************************************************/
+
+#endif
diff --git a/randomnumber.h b/randomnumber.h
new file mode 100644 (file)
index 0000000..59f94de
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef RANDOMNUMBER
+#define RANDOMNUMBER
+
+/*
+ *  randomnumber.h
+ *  
+ *
+ *  Created by Pat Schloss on 7/6/11.
+ *  Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+/**************************************************************************************************/
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class RandomNumberGenerator {
+       
+public:
+       RandomNumberGenerator();
+    float randomUniform();
+       float randomExp();
+       float randomNorm();
+       float randomGamma(float);
+       vector<float> randomDirichlet(vector<float> alphas);
+       
+};
+
+/**************************************************************************************************/
+
+#endif
index b82127c51e6a64b50480a0f9b359cda3cd8e75f0..da22e74e90731d5294c6250d4c19f1a84045c06d 100644 (file)
@@ -28,25 +28,14 @@ void RareDisplay::update(SAbundVector* rank){
                int newNSeqs = rank->getNumSeqs();
                vector<double> data = estimate->getValues(rank);
 
-               if(nIters != 1){
-
-                       double oldS = var[index] * ( nIters - 2 );
-                       double delta = data[0] - results[index];
-                       double newMean = results[index] + delta / nIters;
-                       double newS = oldS + delta * ( data[0] - newMean );
-                       double newVar = newS / ( nIters - 1 );
-
-                       seqs[index] = newNSeqs;
-                       results[index] = newMean; 
-                       var[index] = newVar;
-                       
-                       index++;  
-               }
-               else{
-                       seqs.push_back(newNSeqs); 
-                       results.push_back(data[0]);
-                       var.push_back(0.0);
-               }
+               map<int, vector<double> >::iterator it = results.find(newNSeqs);
+        if (it == results.end()) { //first iter for this count
+            vector<double> temp;
+            temp.push_back(data[0]);
+            results[newNSeqs] = temp;
+        }else {
+            it->second.push_back(data[0]);
+        }
        }
        catch(exception& e) {
                m->errorOut(e, "RareDisplay", "update");
@@ -58,28 +47,15 @@ void RareDisplay::update(SAbundVector* rank){
 void RareDisplay::update(vector<SharedRAbundVector*> shared, int numSeqs, int numGroupComb) {
        try {
                vector<double> data = estimate->getValues(shared); 
-               double newNSeqs = data[0];
                
-               if(nIters != 1){
-               
-                       double oldS = var[index] * ( nIters - 2 );
-                       double delta = data[0] - results[index];
-                       double newMean = results[index] + delta / nIters;
-                       double newS = oldS + delta * ( data[0] - newMean );
-                       double newVar = newS / ( nIters - 1 );
-                       seqs[index] = newNSeqs;
-                       results[index] = newMean; 
-                       var[index] = newVar;
-                       
-                       index++;  
-               }
-               else{
-                       
-                       seqs.push_back(newNSeqs); 
-                       results.push_back(data[0]);
-                       var.push_back(0.0);
-
-               }
+               map<int, vector<double> >::iterator it = results.find(numSeqs);
+        if (it == results.end()) { //first iter for this count
+            vector<double> temp;
+            temp.push_back(data[0]);
+            results[numSeqs] = temp;
+        }else {
+            it->second.push_back(data[0]);
+        }
        }
        catch(exception& e) {
                m->errorOut(e, "RareDisplay", "update");
@@ -92,7 +68,6 @@ void RareDisplay::update(vector<SharedRAbundVector*> shared, int numSeqs, int nu
 void RareDisplay::reset(){
        try {
                nIters++;
-               index = 0;
        }
        catch(exception& e) {
                m->errorOut(e, "RareDisplay", "reset");
@@ -104,29 +79,28 @@ void RareDisplay::reset(){
 
 void RareDisplay::close(){
        try {
-               
                output->initFile(label);
        
-               for (int i = 0; i < seqs.size(); i++) {
+               for (map<int, vector<double> >::iterator it = results.begin(); it != results.end(); it++) {
                
                        vector<double> data(3,0);
-                       double variance = var[i];
-                       
-                       data[0] = results[i];
-                       
-                       double ci = 1.96 * pow(variance, 0.5);
-                       data[1] = data[0] - ci;
-                       data[2] = data[0] + ci;
+            
+            sort((it->second).begin(), (it->second).end());
+            
+            //lci=results[int(0.025*iter)] and hci=results[int(0.975*iter)]
+                       data[0] = (it->second)[(int)(0.50*(nIters-1))];
+            //data[0] = m->getAverage(it->second);
+                       data[1] = (it->second)[(int)(0.025*(nIters-1))];
+                       data[2] = (it->second)[(int)(0.975*(nIters-1))];
+            //cout << nIters << '\t' << (int)(0.025*(nIters-1)) << '\t' << (int)(0.975*(nIters-1)) << endl;
+            
+            //cout << it->first << '\t' << data[1] << '\t' << data[2] << endl;
                
-                       output->output(seqs[i], data);
+                       output->output(it->first, data);
                }
                
                nIters = 1;
-               index = 0;
-               
-               seqs.clear();
-               results.clear();
-               var.clear();
+        results.clear();
                
                output->resetFile();
        }
@@ -142,17 +116,29 @@ void RareDisplay::inputTempFiles(string filename){
                ifstream in;
                m->openInputFile(filename, in);
                
-               int thisIters;
-               in >> thisIters; m->gobble(in);
+               int thisIters, size;
+               in >> thisIters >> size; m->gobble(in);
+        nIters += thisIters;
                
-               for (int i = 0; i < seqs.size(); i++) {
-                       double tempresult, tempvar;
-                       in >> tempresult >> tempvar; m->gobble(in);
-                       
-                       //find weighted result
-                       results[i] = ((nIters * results[i]) + (thisIters * tempresult)) / (float)(nIters + thisIters);
-                       
-                       var[i] = ((nIters * var[i]) + (thisIters * tempvar)) / (float)(nIters + thisIters);
+               for (int i = 0; i < size; i++) {
+                       int tempCount;
+            in >> tempCount; m->gobble(in);
+            map<int, vector<double> >::iterator it = results.find(tempCount);
+            if (it != results.end()) {
+                for (int j = 0; j < thisIters; j++) {
+                    double value;
+                    in >> value; m->gobble(in);
+                    (it->second).push_back(value);
+                }
+            }else {
+                vector<double> tempValues;
+                for (int j = 0; j < thisIters; j++) {
+                    double value;
+                    in >> value; m->gobble(in);
+                    tempValues.push_back(value);
+                }
+                results[tempCount] = tempValues;
+            }
                }
                
                in.close();
@@ -170,10 +156,14 @@ void RareDisplay::outputTempFiles(string filename){
                ofstream out;
                m->openOutputFile(filename, out);
                
-               out << nIters << endl;
+               out << nIters-1 << '\t' << results.size() << endl;
                
-               for (int i = 0; i < seqs.size(); i++) {
-                       out << results[i] << '\t' << var[i] << endl;
+               for (map<int, vector<double> >::iterator it = results.begin(); it != results.end(); it++) {
+            out << it->first << '\t';
+            for(int i = 0; i < (it->second).size(); i++) {
+                out << (it->second)[i] << '\t';
+            }
+            out << endl;
                }
                
                out.close();
index a87596225b818c525fdd97156f5baf64b1dceb71..6d07efc55ab9ab677bb9f3604f46862d02984e16 100644 (file)
@@ -11,7 +11,7 @@
 class RareDisplay : public Display {
        
 public:
-       RareDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file), nIters(1), index(0) {};
+       RareDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file), nIters(1) {};
        ~RareDisplay()                                  {       delete estimate; delete output;         };
        void init(string);
        void reset();
@@ -27,10 +27,8 @@ private:
        Calculator* estimate;
        FileOutput* output;
        string label;
-       vector<int> seqs;  
-       vector<double> results;
-       vector<double> var;
-       int index, nIters;
+       map<int, vector<double> > results; //maps seqCount to results for that number of sequences
+       int nIters;
 };
 
 #endif
index 83d11c92221bcb3ffbb46ea097cff26b29cd0b6c..e14125cc6d94f563e37d5ea1cea8235a5c8df04b 100644 (file)
@@ -48,7 +48,11 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
 
                
-                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
+                       fileHandle >> firstName; m->gobble(fileHandle);
+            fileHandle >> secondName; m->gobble(fileHandle);
+            fileHandle >> distance;    // get the row and column names and distance
+            
+            if (m->debug) { cout << firstName << '\t' << secondName << '\t' << distance << endl; }
                        
                        if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
        
@@ -106,7 +110,9 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){
                        m->openInputFile(distFile, fileHandle);  //let's start over
 
                        while(fileHandle){
-                               fileHandle >> firstName >> secondName >> distance;
+                               fileHandle >> firstName; m->gobble(fileHandle);
+                fileHandle >> secondName; m->gobble(fileHandle);
+                fileHandle >> distance;        // get the row and column names and distance
                                
                                if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
                
@@ -167,8 +173,10 @@ int ReadColumnMatrix::read(CountTable* countTable){
                while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
             
             
-                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
-                       
+                       fileHandle >> firstName; m->gobble(fileHandle);
+            fileHandle >> secondName; m->gobble(fileHandle);
+            fileHandle >> distance;    // get the row and column names and distance
+            
                        if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
             
                        int itA = countTable->get(firstName);
@@ -224,7 +232,9 @@ int ReadColumnMatrix::read(CountTable* countTable){
                        m->openInputFile(distFile, fileHandle);  //let's start over
             
                        while(fileHandle){
-                               fileHandle >> firstName >> secondName >> distance;
+                               fileHandle >> firstName; m->gobble(fileHandle);
+                fileHandle >> secondName; m->gobble(fileHandle);
+                fileHandle >> distance;        // get the row and column names and distance
                                
                                if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
                 
index 1e5fe11de39f34b30f59249895bcf393506e32f9..64f9ab6822301310e975e6bbe58736e953eed4f2 100644 (file)
@@ -355,11 +355,19 @@ int RemoveGroupsCommand::execute(){
                        
                        //make sure groups are valid
                        //takes care of user setting groupNames that are invalid or setting groups=all
-                       SharedUtil* util = new SharedUtil();
                        vector<string> namesGroups = groupMap->getNamesOfGroups();
-                       util->setGroups(Groups, namesGroups);
-                       delete util;
-                       
+                       vector<string> checkedGroups;
+            for (int i = 0; i < Groups.size(); i++) {
+                if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
+                else {  m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
+            }
+            
+            if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
+                       else {
+                Groups = checkedGroups;
+                m->setGroups(Groups);
+            }
+            
                        //fill names with names of sequences that are from the groups we want to remove 
                        fillNames();
                        
index f61232a26c133fb9d0cebfa9cdcca31fdfe2f0dc..12786ca41c848d967ffda72e2d06f382881d8eed 100644 (file)
@@ -243,16 +243,49 @@ int SensSpecCommand::execute(){
                exit(1);
        }
 }
+//***************************************************************************************************************
+bool SensSpecCommand::testFile(){
+       try{
+        ifstream fileHandle;
+        m->openInputFile(phylipfile, fileHandle);
+        
+        bool square = false;
+        string numTest, name;
+        fileHandle >> numTest >> name;
+
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+
+        char d;
+        while((d=fileHandle.get()) != EOF){
+            if(isalnum(d)){
+                square = true;
+                break;
+            }
+            if(d == '\n'){
+                square = false;
+                break;
+            }
+        }
+        fileHandle.close();
+        
+        return square;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "testFile");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************
 
 int SensSpecCommand::processPhylip(){
        try{
                //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
-               string origCutoff = "";
+        square = testFile();
+               string origCutoff = "";
                bool getCutoff = 0;
                if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
-               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }               
+               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
                
                map<string, int> seqMap;
                string seqList;
@@ -265,7 +298,6 @@ int SensSpecCommand::processPhylip(){
                set<string> processedLabels;
                set<string> userLabels = labels;
                
-               
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
                        if(m->control_pressed){
@@ -461,6 +493,9 @@ int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCu
                                        else                                                            {       trueNegatives++;        }
                                }
                        }
+            
+            if (square) { m->getline(phylipFile); } //get rest of line - redundant distances
+            m->gobble(phylipFile);
                }
                phylipFile.close();
                
@@ -707,28 +742,18 @@ string SensSpecCommand::preProcessList(){
             m->openInputFile(distFile, phylipFile);
             string numTest;
             int pNumSeqs;
-                       phylipFile >> numTest;
+                       phylipFile >> numTest; m->gobble(phylipFile);
                        
                        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
             else {
                 m->mothurConvert(numTest, pNumSeqs);
             }
-            phylipFile >> pNumSeqs; m->gobble(phylipFile);
             
             string seqName;
-            double distance;
-            
             for(int i=0;i<pNumSeqs;i++){
-                
                 if (m->control_pressed) { return ""; }
-                
-                phylipFile >> seqName; 
+                phylipFile >> seqName;  m->getline(phylipFile);  m->gobble(phylipFile);
                 uniqueNames.insert(seqName);
-                
-                for(int j=0;j<i;j++){
-                    phylipFile >> distance;
-                }
-                m->gobble(phylipFile);
             }
             phylipFile.close();
         }else {
@@ -778,8 +803,8 @@ string SensSpecCommand::preProcessList(){
                 m->splitAtComma(binnames, bnames);
                                
                                string newNames = "";
-                for (int i = 0; i < bnames.size(); i++) {
-                                       string name = bnames[i];
+                for (int j = 0; j < bnames.size(); j++) {
+                                       string name = bnames[j];
                                        //if that name is in the .accnos file, add it
                                        if (uniqueNames.count(name) != 0) {  newNames += name + ",";  }
                                }
@@ -803,6 +828,8 @@ string SensSpecCommand::preProcessList(){
                out.close();
 
         if (wroteSomething) { return newListFile; }
+        else { m->mothurRemove(newListFile); }
+        
         return ""; 
     }
     catch(exception& e) {
index 7d85a39bea25ced837551589cf21e2c431c8d7ca..bfa329131f341e6a0750a318cc237ead77bbd8a4 100644 (file)
@@ -48,7 +48,7 @@ private:
        set<string> labels; //holds labels to be used
 
        long int truePositives, falsePositives, trueNegatives, falseNegatives;
-       bool abort, allLines;
+       bool abort, allLines, square;
        bool hard;
        //string lineLabel;
        double cutoff;
@@ -59,6 +59,7 @@ private:
        int process(map<string, int>&, string, bool&, string&);
        int process(set<string>&, string, bool&, string&, int);
     string preProcessList();
+    bool testFile();
 
 };
 
index 54d3dcc5c9b6c93f267478e6491126b3aedff772..5879241372dd3b60b34683683a0c1edbcbd64d57 100644 (file)
@@ -1412,9 +1412,7 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                                        string sname = "";  nameStream >> sname;
                                        sname = sname.substr(1);
                     
-                    for (int i = 0; i < sname.length(); i++) {
-                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                    }
+                    m->checkName(sname);
                                        
                                        map<string, int>::iterator it = firstSeqNames.find(sname);
                                        
@@ -1475,9 +1473,7 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                     istringstream nameStream(input);
                     string sname = "";  nameStream >> sname;
                     
-                    for (int i = 0; i < sname.length(); i++) {
-                        if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                    }
+                    m->checkName(sname);
                     
                     map<string, int>::iterator it = firstSeqNamesReport.find(sname);
                 
index ddc7d4c5a410ca99a415ee1e15d7c60b2199060f..d6073d75da3ed630b27b626bbba8615d79b58858 100644 (file)
@@ -21,9 +21,7 @@ Sequence::Sequence(string newName, string sequence) {
                initialize();   
                name = newName;
         
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-        }
+        m->checkName(name);
                
                //setUnaligned removes any gap characters for us
                setUnaligned(sequence);
@@ -41,9 +39,7 @@ Sequence::Sequence(string newName, string sequence, string justUnAligned) {
                initialize();   
                name = newName;
         
-        for (int i = 0; i < name.length(); i++) {
-            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-        }
+        m->checkName(name);
                
                //setUnaligned removes any gap characters for us
                setUnaligned(sequence);
@@ -295,9 +291,7 @@ string Sequence::getSequenceName(ifstream& fastaFile) {
             
                        name = name.substr(1); 
             
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
             
         }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
@@ -319,9 +313,7 @@ string Sequence::getSequenceName(istringstream& fastaFile) {
             
                        name = name.substr(1); 
             
-            for (int i = 0; i < name.length(); i++) {
-                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
-            }
+            m->checkName(name);
             
         }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
         
index 4536d9516688fe6db1d9c10e709e789c40abe93c..3e50f4a508d2608ef1b51b9445fd3a388d1f4dec 100644 (file)
@@ -14,6 +14,7 @@ vector<string> SetDirectoryCommand::setParameters(){
        try {
                CommandParameter ptempdefault("tempdefault", "String", "", "", "", "", "","",false,false); parameters.push_back(ptempdefault);
         CommandParameter pdebug("debug", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdebug);
+        CommandParameter pmodnames("modifynames", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmodnames);
                CommandParameter pinput("input", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pinput);
                CommandParameter poutput("output", "String", "", "", "", "", "","",false,false,true); parameters.push_back(poutput);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -36,6 +37,7 @@ string SetDirectoryCommand::getHelpString(){
                helpString += "The set.dir command can also be used to specify the directory where your input files are located, the directory must exist.\n";
                helpString += "The set.dir command can also be used to override or set the default location mothur will look for files if it is unable to find them, the directory must exist.\n";
         helpString += "The set.dir command can also be used to run mothur in debug mode.\n";
+        helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to avoid issues while making trees.  modifynames=F will leave sequence names as they are.\n";
                helpString += "The set.dir command parameters are input, output, tempdefault and debug and one is required.\n";
         helpString += "To run mothur in debug mode set debug=true. Default debug=false.\n";
                helpString += "To return the output to the same directory as the input files you may enter: output=clear.\n";
@@ -93,10 +95,17 @@ SetDirectoryCommand::SetDirectoryCommand(string option)  {
             else {  debug = m->isTrue(temp); }
             m->debug = debug;
             
+            bool nomod = false;
+            temp = validParameter.validFile(parameters, "modifynames", false);
+                       if (temp == "not found") {  modifyNames = true;  nomod=true; }
+            else {  modifyNames = m->isTrue(temp); }
+            m->modifyNames = modifyNames;
+            
             if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); }
+            
                                
-                       if ((input == "") && (output == "") && (tempdefault == "") && nodebug) {        
-                               m->mothurOut("You must provide either an input, output, tempdefault or debug for the set.outdir command."); m->mothurOutEndLine(); abort = true;
+                       if ((input == "") && (output == "") && (tempdefault == "") && nodebug && nomod) {
+                               m->mothurOut("You must provide either an input, output, tempdefault, debug or modifynames for the set.outdir command."); m->mothurOutEndLine(); abort = true;
                        }else if((input == "") && (output == "") && (tempdefault == "")) { debugOnly = true; }
                }
        }
index ab174c2b783eca5c3a7fc0d74a8a2f6c5abf4a70..36753edeb57f8601dd0a1f5309b9c1cf67109429 100644 (file)
@@ -37,7 +37,7 @@ public:
 private:
        CommandFactory* commandFactory;
        string output, input, tempdefault;
-       bool abort, debugOnly;
+       bool abort, debugOnly, modifyNames;
        vector<string> outputNames;
        
                
index b6894b48fd1371cb1b963e5a0e1714dc6cdf24b8..00f23ed24cc83e8eddf66f328a1fe3ce765e122d 100644 (file)
@@ -29,8 +29,7 @@ vector<string> SffMultipleCommand::setParameters(){
         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
                CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
                CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
-               CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder);
-
+               CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
         //shhh.flows
         CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup);
                CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff);
@@ -89,7 +88,7 @@ string SffMultipleCommand::getHelpString(){
                helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
                helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
-
+        helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
                helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
                helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
                return helpString;
@@ -233,11 +232,20 @@ SffMultipleCommand::SffMultipleCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
             
-                       flowOrder = validParameter.validFile(parameters, "order", false);
-                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
-                       else if(flowOrder.length() != 4){
-                               m->mothurOut("The value of the order option must be four bases long\n");
-                       }
+                       temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+            }
+            else {
+                if (toupper(temp[0]) == 'A') {  flowOrder = "A";   }
+                else if(toupper(temp[0]) == 'B'){
+                    flowOrder = "B";   }
+                else if(toupper(temp[0]) == 'I'){
+                    flowOrder = "I";   }
+                else {
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
+                }
+            }
+
             
             temp = validParameter.validFile(parameters, "cutoff", false);      if (temp == "not found"){       temp = "0.01";          }
                        m->mothurConvert(temp, cutoff); 
@@ -370,9 +378,9 @@ int SffMultipleCommand::execute(){
         string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
         map<string, string> variables; 
                variables["[filename]"] = fileroot;
-        string fasta = fileroot + getOutputFileName("fasta",variables);
-        string name = fileroot + getOutputFileName("name",variables);
-        string group = fileroot + getOutputFileName("group",variables);
+        string fasta = getOutputFileName("fasta",variables);
+        string name = getOutputFileName("name",variables);
+        string group = getOutputFileName("group",variables);
         
         if (m->control_pressed) { return 0; }
         
index fa32fc43a873d22482cfcb8e4172343f3516cbce..71c868b158ab41c2b8d72e7dac301998e2c39537 100644 (file)
@@ -191,6 +191,16 @@ float SharedRAbundFloatVector::getAbundance(int index){
        return data[index].abundance;   
 }
 /***********************************************************************/
+//returns vector of abundances
+vector<float> SharedRAbundFloatVector::getAbundances(){
+    vector<float> abunds;
+    for (int i = 0; i < data.size(); i++) {
+        abunds.push_back(data[i].abundance);
+    }
+    
+       return abunds;
+}
+/***********************************************************************/
 individualFloat SharedRAbundFloatVector::get(int index){
        return data[index];     
 }
index 965b03ae7aebaf3e499ef55264a0d9ebe421380b..f8d69a8a5a17d4b9cadcf0e5c500d93498f601fc 100644 (file)
@@ -45,6 +45,7 @@ public:
        individualFloat get(int);
        vector <individual> getData();
        float getAbundance(int);
+    vector<float> getAbundances();
        void push_front(float, int, string); //abundance, otu, groupname
        void insert(float, int, string); //abundance, otu, groupname
        void push_back(float, string);  //abundance, groupname
index e970bab34f9276a680bce6e29faa9cf829684e12..9bd437ad3cb7b2a75b69770769c8ddbc942f4a35 100644 (file)
@@ -21,7 +21,7 @@ vector<string> ShhherCommand::setParameters(){
         CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
                CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
                CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
-        CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder);               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);             CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -42,7 +42,7 @@ string ShhherCommand::getHelpString(){
         helpString += "The flow parameter is used to input your flow file.\n";
         helpString += "The file parameter is used to input the *flow.files file created by trim.flows.\n";
         helpString += "The lookup parameter is used specify the lookup file you would like to use. http://www.mothur.org/wiki/Lookup_files.\n";
-        helpString += "The order parameter options are A or B.  A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n";
+        helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
                return helpString;
        }
        catch(exception& e) {
@@ -362,14 +362,16 @@ ShhherCommand::ShhherCommand(string option) {
                        m->mothurConvert(temp, sigma); 
                        
                        temp = validParameter.validFile(parameters, "order", false);  if (temp == "not found"){         temp = "A";     }
-            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n");  abort=true;
+            if (temp.length() > 1) {  m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
             }
             else {
                 if (toupper(temp[0]) == 'A') {  flowOrder = "TACG";   }
-                else if(toupper(temp[0]) == 'B'){ 
+                else if(toupper(temp[0]) == 'B'){
                     flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC";   }
+                else if(toupper(temp[0]) == 'I'){
+                    flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC";   }
                 else {
-                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n");  abort=true;
+                    m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n");  abort=true;
                 }
             }
 
diff --git a/sparcccommand.cpp b/sparcccommand.cpp
new file mode 100644 (file)
index 0000000..a7f5d78
--- /dev/null
@@ -0,0 +1,579 @@
+//
+//  sparcccommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 5/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sparcccommand.h"
+
+
+//**********************************************************************************************************************
+vector<string> SparccCommand::setParameters(){
+       try {
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter psamplings("samplings", "Number", "", "20", "", "", "","",false,false,false); parameters.push_back(psamplings);
+        CommandParameter piterations("iterations", "Number", "", "10", "", "", "","",false,false,false); parameters.push_back(piterations);
+        CommandParameter ppermutations("permutations", "Number", "", "1000", "", "", "","",false,false,false); parameters.push_back(ppermutations);
+        CommandParameter pmethod("method", "Multiple", "relabund-dirichlet", "dirichlet", "", "", "","",false,false); parameters.push_back(pmethod);
+        CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); 
+        //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SparccCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The sparcc command allows you to ....\n";
+               helpString += "The sparcc command parameters are: shared, groups, label, samplings, iterations, permutations, processors and method.\n";
+               helpString += "The samplings parameter is used to .... Default=20.\n";
+        helpString += "The iterations parameter is used to ....Default=10.\n";
+        helpString += "The permutations parameter is used to ....Default=1000.\n";
+        helpString += "The method parameter is used to ....Options are relabund and dirichlet. Default=dirichlet.\n";
+        helpString += "The default value for groups is all the groups in your sharedfile.\n";
+               helpString += "The label parameter is used to analyze specific labels in your shared file.\n";
+               helpString += "The sparcc command should be in the following format: sparcc(shared=yourSharedFile)\n";
+               helpString += "sparcc(shared=final.an.shared)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SparccCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "corr") {  pattern = "[filename],[distance],sparcc_correlation"; }
+        else if (type == "pvalue") {  pattern = "[filename],[distance],sparcc_pvalue"; }
+        else if (type == "sparccrelabund") {  pattern = "[filename],[distance],sparcc_relabund"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SparccCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["corr"] = tempOutNames; 
+               outputTypes["pvalue"] = tempOutNames;
+               outputTypes["sparccrelabund"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "SparccCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+        allLines = 1;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["corr"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos...
+            outputTypes["pvalue"] = tempOutNames;
+            outputTypes["sparccrelabund"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                 
+                               string path;
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+                        
+                       //check for parameters
+            //get shared file, it is required
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+                       }
+            
+                       normalizeMethod = validParameter.validFile(parameters, "method", false);
+                       if (normalizeMethod == "not found") { normalizeMethod = "dirichlet"; }
+                       if ((normalizeMethod == "dirichlet") || (normalizeMethod == "relabund")) { }
+                       else { m->mothurOut(normalizeMethod + " is not a valid method.  Valid methods are dirichlet and relabund."); m->mothurOutEndLine(); abort = true; }
+            
+            
+            string temp = validParameter.validFile(parameters, "samplings", false);    if (temp == "not found"){       temp = "20";    }
+                       m->mothurConvert(temp, numSamplings);
+            
+            if(normalizeMethod == "relabund"){ numSamplings = 1; }
+            
+            temp = validParameter.validFile(parameters, "iterations", false);  if (temp == "not found"){       temp = "10";    }
+                       m->mothurConvert(temp, maxIterations);
+            
+            temp = validParameter.validFile(parameters, "permutations", false);        if (temp == "not found"){       temp = "1000";  }
+                       m->mothurConvert(temp, numPermutations);
+            
+            temp = validParameter.validFile(parameters, "processors", false);  if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors);
+    
+            string groups = validParameter.validFile(parameters, "groups", false);
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups); }
+                       m->setGroups(Groups);
+            
+            string label = validParameter.validFile(parameters, "label", false);
+                       if (label == "not found") { label = ""; }
+                       else {
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "SparccCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SparccCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        int start = time(NULL);
+        
+        InputData input(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+        string lastLabel = lookup[0]->getLabel();
+        
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;
+        
+        //as long as you are not at the end of the file or done wih the lines you want
+        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            
+            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
+            
+            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+            }
+            
+            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);                
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+            }
+            
+            lastLabel = lookup[0]->getLabel();
+            //prevent memory leak
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+            
+            if (m->control_pressed) { return 0; }
+            
+            //get next line to process
+            lookup = input.getSharedRAbundVectors();
+        }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+        
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {
+            m->mothurOut("Your file does not include the label " + *it);
+            if (processedLabels.count(lastLabel) != 1) {
+                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                needToRun = true;
+            }else {
+                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+            }
+        }
+        
+        //run last label if you need to
+        if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            
+            process(lookup);           
+            
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+        }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+        
+        m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to process.");
+        m->mothurOutEndLine();
+        m->mothurOutEndLine();
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "execute");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<vector<float> > SparccCommand::shuffleSharedVector(vector<vector<float> >& sharedVector){
+    try {
+        int numGroups = (int)sharedVector.size();
+        int numOTUs = (int)sharedVector[0].size();
+        
+        vector<vector<float> > shuffledVector = sharedVector;
+        
+        for(int i=0;i<numGroups;i++){
+            for(int j=0;j<numOTUs;j++){
+                shuffledVector[i][j] = sharedVector[rand()%numGroups][j];
+            }
+        }
+        
+        return shuffledVector;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
+       try {
+        cout.setf(ios::fixed, ios::floatfield);
+        cout.setf(ios::showpoint);
+        
+        vector<vector<float> > sharedVector;
+        vector<string> otuNames = m->currentBinLabels;
+        
+        //fill sharedVector to pass to CalcSparcc
+        for (int i = 0; i < lookup.size(); i++) {
+            vector<int> abunds = lookup[i]->getAbundances();
+            vector<float> temp;
+            for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); }
+            sharedVector.push_back(temp);
+        }
+        int numOTUs = (int)sharedVector[0].size();
+        int numGroups = lookup.size();
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+        
+
+        string relAbundFileName = getOutputFileName("sparccrelabund", variables);
+        ofstream relAbundFile;
+        m->openOutputFile(relAbundFileName, relAbundFile);
+        outputNames.push_back(relAbundFileName);  outputTypes["sparccrelabund"].push_back(relAbundFileName);
+        
+        relAbundFile << "OTU\taveRelAbund\n";
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { relAbundFile.close(); return 0; }
+            
+            double relAbund = 0.0000;
+            for(int j=0;j<numGroups;j++){
+                relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs();
+            }
+            relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl;
+        }
+        relAbundFile.close();
+        
+        CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod);
+        vector<vector<float> > origCorrMatrix = originalData.getRho();
+        
+        string correlationFileName = getOutputFileName("corr", variables);
+        ofstream correlationFile;
+        m->openOutputFile(correlationFileName, correlationFile);
+        outputNames.push_back(correlationFileName);  outputTypes["corr"].push_back(correlationFileName);
+        correlationFile.setf(ios::fixed, ios::floatfield);
+        correlationFile.setf(ios::showpoint);
+        
+        for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i];    }   correlationFile << endl;
+        for(int i=0;i<numOTUs;i++){
+            correlationFile << otuNames[i];
+            for(int j=0;j<numOTUs;j++){
+                correlationFile << '\t' << origCorrMatrix[i][j];
+            }
+            correlationFile << endl;
+        }
+        
+        
+        if(numPermutations != 0){
+            vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix);
+            
+            if (m->control_pressed) { return 0; }
+            
+            string pValueFileName = getOutputFileName("pvalue", variables);
+            ofstream pValueFile;
+            m->openOutputFile(pValueFileName, pValueFile);
+            outputNames.push_back(pValueFileName);  outputTypes["pvalue"].push_back(pValueFileName);
+            pValueFile.setf(ios::fixed, ios::floatfield);
+            pValueFile.setf(ios::showpoint);
+            
+            for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i];    }   pValueFile << endl;
+            for(int i=0;i<numOTUs;i++){
+                pValueFile << otuNames[i];
+                for(int j=0;j<numOTUs;j++){
+                    pValueFile << '\t' << pValues[i][j];
+                }
+                pValueFile << endl;
+            }
+        }
+
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::createProcesses(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix){
+       try {
+        int numOTUs = sharedVector[0].size();
+        vector<vector<float> > pValues;
+        
+        if(processors == 1){
+                       pValues = driver(sharedVector, origCorrMatrix, numPermutations);
+               }else{
+            //divide iters between processors
+                       vector<int> procIters;
+            int numItersPerProcessor = numPermutations / processors;
+            
+            //divide iters between processes
+            for (int h = 0; h < processors; h++) {
+                if(h == processors - 1){ numItersPerProcessor = numPermutations - h * numItersPerProcessor; }
+                procIters.push_back(numItersPerProcessor);
+            }
+            
+            vector<int> processIDS;
+            int process = 1;
+                       
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            
+                       //loop through and create all the processes you want
+                       while (process != processors) {
+                               int pid = fork();
+                               
+                               if (pid > 0) {
+                                       processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                                       process++;
+                               }else if (pid == 0){
+                                       pValues = driver(sharedVector, origCorrMatrix, procIters[process]);
+                                       
+                                       //pass pvalues to parent
+                                       ofstream out;
+                                       string tempFile = toString(getpid()) + ".pvalues.temp";
+                                       m->openOutputFile(tempFile, out);
+                                       
+                                       //pass values
+                                       for (int i = 0; i < pValues.size(); i++) {
+                        for (int j = 0; j < pValues[i].size(); j++) {
+                            out << pValues[i][j] << '\t';
+                        }
+                        out << endl;
+                                       }
+                                       out << endl;
+                                       
+                                       out.close();
+                                       
+                                       exit(0);
+                               }else {
+                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+                                       for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                                       exit(0);
+                               }
+                       }
+                       
+                       //do my part
+                       pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+                       
+                       //force parent to wait until all the processes are done
+                       for (int i=0;i<processIDS.size();i++) {
+                               int temp = processIDS[i];
+                               wait(&temp);
+                       }
+                       
+                       //combine results
+                       for (int i = 0; i < processIDS.size(); i++) {
+                               ifstream in;
+                               string tempFile =  toString(processIDS[i]) + ".pvalues.temp";
+                               m->openInputFile(tempFile, in);
+                               
+                               ////// to do ///////////
+                               int numTemp; numTemp = 0;
+                               
+                for (int j = 0; j < pValues.size(); j++) {
+                    for (int k = 0; k < pValues.size(); k++) {
+                        in >> numTemp; m->gobble(in);
+                        pValues[j][k] += numTemp;
+                    }
+                    m->gobble(in);
+                               }
+                               in.close(); m->mothurRemove(tempFile);
+                       }
+#else
+            
+            //fill in functions
+            vector<sparccData*> pDataArray;
+            DWORD   dwThreadIdArray[processors-1];
+            HANDLE  hThreadArray[processors-1];
+            
+            //Create processor worker threads.
+            for( int i=1; i<processors; i++ ){
+                
+                //make copy so we don't get access violations
+                vector< vector<float> > copySharedVector = sharedVector;
+                vector< vector<float> > copyOrig = origCorrMatrix;
+                
+                sparccData* temp = new sparccData(m, procIters[i], copySharedVector, copyOrig, numSamplings, maxIterations, numPermutations, normalizeMethod);
+                pDataArray.push_back(temp);
+                processIDS.push_back(i);
+                
+                hThreadArray[i-1] = CreateThread(NULL, 0, MySparccThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+            }
+            
+            //do my part
+                       pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+            
+            //Wait until all threads have terminated.
+            WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+            
+            //Close all thread handles and free memory allocations.
+            for(int i=0; i < pDataArray.size(); i++){
+                for (int j = 0; j < pDataArray[i]->pValues.size(); j++) {
+                    for (int k = 0; k < pDataArray[i]->pValues[j].size(); k++) {
+                        pValues[j][k] += pDataArray[i]->pValues[j][k];
+                    }
+                }
+                
+                CloseHandle(hThreadArray[i]);
+                delete pDataArray[i];
+            }
+#endif
+               }
+
+        for(int i=0;i<numOTUs;i++){
+            pValues[i][i] = 1;
+            for(int j=0;j<i;j++){
+                pValues[i][j] /= (double)numPermutations;
+                pValues[j][i] = pValues[i][j];
+            }
+        }
+        
+        return pValues;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::driver(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix, int numPerms){
+       try {
+        int numOTUs = sharedVector[0].size();
+        vector<vector<float> > sharedShuffled = sharedVector;
+        vector<vector<float> > pValues(numOTUs);
+        for(int i=0;i<numOTUs;i++){ pValues[i].assign(numOTUs, 0);  }
+
+        for(int i=0;i<numPerms;i++){
+            if (m->control_pressed) { return pValues; }
+            sharedShuffled = shuffleSharedVector(sharedVector);
+            CalcSparcc permutedData(sharedShuffled, maxIterations, numSamplings, normalizeMethod);
+            vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+            
+            for(int j=0;j<numOTUs;j++){
+                for(int k=0;k<j;k++){
+                    double randValue = permuteCorrMatrix[j][k];
+                    double observedValue = origCorrMatrix[j][k];
+                    if(observedValue >= 0 &&  randValue > observedValue)   { pValues[j][k]++; }//this method seems to deflate the
+                    else if(observedValue < 0 && randValue < observedValue){ pValues[j][k]++; }//pvalues of small rho values
+                }
+            }
+            if((i+1) % (int)(numPermutations * 0.05) == 0){ cout << i+1 << endl;  }
+        }
+        
+        return pValues;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SparccCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
diff --git a/sparcccommand.h b/sparcccommand.h
new file mode 100644 (file)
index 0000000..8add3ed
--- /dev/null
@@ -0,0 +1,133 @@
+//
+//  sparcccommand.h
+//  Mothur
+//
+//  Created by SarahsWork on 5/10/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sparcccommand_h
+#define Mothur_sparcccommand_h
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "calcsparcc.h"
+
+/**************************************************************************************************/
+
+class SparccCommand : public Command {
+public:
+    SparccCommand(string);
+    SparccCommand();
+    ~SparccCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "sparcc";                      }
+    string getCommandCategory()                { return "OTU-Based Approaches";                }
+    
+    string getOutputPattern(string);
+    //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+       string getHelpString();
+    string getCitation() { return "Friedman J, Alm EJ (2012) Inferring Correlation Networks from Genomic Survey Data. PLoS Comput Biol 8(9): e1002687. doi:10.1371/journal.pcbi.1002687 http://www.mothur.org/wiki/Sparcc"; }
+    string getDescription()            { return "brief description"; }
+    
+    int execute();
+    void help() { m->mothurOut(getHelpString()); }
+    
+private:
+    bool abort, allLines;
+    string outputDir, sharedfile, normalizeMethod;
+    int numSamplings, maxIterations, numPermutations, processors;
+    set<string> labels;
+    vector<string> Groups;
+    vector<string> outputNames;
+    
+    int process(vector<SharedRAbundVector*>&);
+    vector<vector<float> > createProcesses(vector<vector<float> >&, vector<vector<float> >&);
+    vector<vector<float> > driver(vector<vector<float> >&, vector<vector<float> >&, int);
+    vector<vector<float> > shuffleSharedVector(vector<vector<float> >&);
+};
+
+/**************************************************************************************************/
+
+struct sparccData {
+       MothurOut* m;
+    int numPerms;
+    vector< vector<float> > sharedVector;
+    vector< vector<float> > origCorrMatrix;
+    vector<vector<float> > pValues;
+    int numSamplings, maxIterations, numPermutations;
+    string normalizeMethod;
+       
+       sparccData(){}
+       sparccData(MothurOut* mout, int it, vector< vector<float> > cs, vector< vector<float> > co, int ns, int mi, int np, string nm) {
+               m = mout;
+        numPerms = it;
+        sharedVector = cs;
+        origCorrMatrix = co;
+        numSamplings = ns;
+        maxIterations = mi;
+        numPermutations = np;
+        normalizeMethod = nm;
+    }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MySparccThreadFunction(LPVOID lpParam){
+       sparccData* pDataArray;
+       pDataArray = (sparccData*)lpParam;
+       
+       try {
+        
+        int numOTUs = pDataArray->sharedVector[0].size();
+        vector<vector<float> > sharedShuffled = pDataArray->sharedVector;
+        pDataArray->pValues.resize(numOTUs);
+        for(int i=0;i<numOTUs;i++){ pDataArray->pValues[i].assign(numOTUs, 0);  }
+        
+        for(int i=0;i<pDataArray->numPerms;i++){
+            if (pDataArray->m->control_pressed) { return 0; }
+            
+            //sharedShuffled = shuffleSharedVector(sharedVector);
+            //////////////////////////////////////////////////////////
+            int numGroups = (int)pDataArray->sharedVector.size();
+            sharedShuffled = pDataArray->sharedVector;
+            
+            for(int k=0;k<numGroups;k++){
+                for(int j=0;j<numOTUs;j++){
+                    sharedShuffled[k][j] = pDataArray->sharedVector[rand()%numGroups][j];
+                }
+            }
+            /////////////////////////////////////////////////////////
+            
+            CalcSparcc permutedData(sharedShuffled, pDataArray->maxIterations, pDataArray->numSamplings, pDataArray->normalizeMethod);
+            vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+            
+            for(int j=0;j<numOTUs;j++){
+                for(int k=0;k<j;k++){
+                    double randValue = permuteCorrMatrix[j][k];
+                    double observedValue = pDataArray->origCorrMatrix[j][k];
+                    if(observedValue >= 0 &&  randValue > observedValue)   { pDataArray->pValues[j][k]++; }//this method seems to deflate the
+                    else if(observedValue < 0 && randValue < observedValue){ pDataArray->pValues[j][k]++; }//pvalues of small rho values
+                }
+            }
+            if((i+1) % (int)(pDataArray->numPermutations * 0.05) == 0){ cout << i+1 << endl;  }
+        }
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "SparccCommand", "MySparccThreadFunction");
+               exit(1);
+       }
+}
+#endif
+
+
+/**************************************************************************************************/
+
+
+
+
+#endif
index a5ae3cbda79fd52a9358464eab1941c96a69bd63..f6b5c4d81cf5446adc5e5253808380be7ebbf28d 100644 (file)
@@ -182,7 +182,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                }
                
                copyGroups.clear();
-               
+        
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
@@ -207,6 +207,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
             else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
                }
         
+        //restore old fasta file name since dist.seqs overwrites it with the temp files
+        m->setFastaFile(fastafile);
+        
         vector<string> tempDistFiles;    
         for(int i=0;i<numGroups;i++){
             if (outputDir == "") { outputDir = m->hasPath(fastafile); }
@@ -332,7 +335,7 @@ int SplitMatrix::splitDistanceLarge(){
                
                int numGroups = 0;
 
-               ofstream outFile;
+               //ofstream outFile;
                ifstream dFile;
                m->openInputFile(distFile, dFile);
        
@@ -408,6 +411,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                                                        
                                        //have we reached the max buffer size
                                        if (numOutputs[groupID] > 60) { //write out sequence
+                        ofstream outFile;
                                                outFile.open(fileName.c_str(), ios::app);
                                                outFile << outputs[groupID] << seqA << '\t' << seqB << '\t' << dist << endl;
                                                outFile.close();
@@ -434,7 +438,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                        //if groupB is written to file it is above buffer size so read and write to new merged file
                                                        if (wroteOutPut[groupIDB]) {
                                                                string fileName2 = distFile + "." + toString(groupIDB) + ".temp";
-                                                               ifstream fileB(fileName2.c_str(), ios::ate);
+                                                               /*ifstream fileB(fileName2.c_str(), ios::ate);
                                                                
                                                                outFile.open(fileName.c_str(), ios::app);
                                                                
@@ -469,17 +473,22 @@ int SplitMatrix::splitDistanceLarge(){
                                                                outFile << temp.substr(0, lastRead);
                                                                delete memblock;
                                                                
-                                                               fileB.close();
+                                                               fileB.close();*/
+                                m->appendFiles(fileName2, fileName);
                                                                m->mothurRemove(fileName2);
+                        
                                                                
                                                                //write out the merged memory
                                                                if (numOutputs[groupID] > 60) {
-                                                                       outFile << outputs[groupID];
+                                    ofstream tempOut;
+                                    m->openOutputFile(fileName, tempOut);
+                                                                       tempOut << outputs[groupID];
                                                                        outputs[groupID] = "";
                                                                        numOutputs[groupID] = 0;
+                                    tempOut.close();
                                                                }
                                                                
-                                                               outFile.close();
+                                                               //outFile.close();
                                                                
                                                                wroteOutPut[groupID] = true;
                                                                wroteOutPut[groupIDB] = false;
@@ -494,7 +503,7 @@ int SplitMatrix::splitDistanceLarge(){
                                                        
                                                        if (wroteOutPut[groupIDA]) {
                                                                string fileName2 = distFile + "." + toString(groupIDA) + ".temp";
-                                                               ifstream fileB(fileName2.c_str(), ios::ate);
+                                                               /*ifstream fileB(fileName2.c_str(), ios::ate);
                                                                
                                                                outFile.open(fileName.c_str(), ios::app);
                                                                
@@ -529,17 +538,21 @@ int SplitMatrix::splitDistanceLarge(){
                                                                        
                                                                delete memblock;
                                                                
-                                                               fileB.close();
+                                                               fileB.close();*/
+                                m->appendFiles(fileName2, fileName);
                                                                m->mothurRemove(fileName2);
                                                                
                                                                //write out the merged memory
                                                                if (numOutputs[groupID] > 60) {
-                                                                       outFile << outputs[groupID];
+                                    ofstream tempOut;
+                                    m->openOutputFile(fileName, tempOut);
+                                                                       tempOut << outputs[groupID];
                                                                        outputs[groupID] = "";
                                                                        numOutputs[groupID] = 0;
+                                    tempOut.close();
                                                                }
                                                                
-                                                               outFile.close();
+                                                               //outFile.close();
                                                                
                                                                wroteOutPut[groupID] = true;
                                                                wroteOutPut[groupIDA] = false;
@@ -559,6 +572,7 @@ int SplitMatrix::splitDistanceLarge(){
             //remove old names files just in case
                        
                        if (numOutputs[i] > 0) {
+                ofstream outFile;
                                outFile.open(fileName.c_str(), ios::app);
                                outFile << outputs[i];
                                outFile.close();
index e82d1f6600e0b83f79154634ddfbda43d7542d91..d93fb6e775392b5436b6dadbb99c88761e862de8 100644 (file)
@@ -703,7 +703,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                 
                 vector<summarySharedData*> pDataArray; 
                 DWORD   dwThreadIdArray[processors-1];
-                HANDLE  hThreadArray[processors-1]; 
+                HANDLE  hThreadArray[processors-1];
                 
                 //Create processor worker threads.
                 for( int i=1; i<processors; i++ ){
@@ -716,6 +716,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                         temp->setGroup(thisLookup[k]->getGroup());
                         newLookup.push_back(temp);
                     }
+                
                     
                     //for each bin
                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
index 0119741988c010e0d3b3d3df1bbc4c052f5bba8f..80698f1b4b0317719421dbe9c4af3c0583bd9d97 100644 (file)
@@ -662,8 +662,18 @@ int TreeGroupCommand::makeSimsShared() {
             
             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
         }
-        
         numGroups = lookup.size();
+        
+        //sanity check to make sure processors < numComparisions
+        int numDists = 0;
+        for(int i=0;i<numGroups;i++){
+            for(int j=0;j<i;j++){
+                numDists++;
+                if (numDists > processors) { break; }
+            }
+        }
+        if (numDists < processors) { processors = numDists; }
+        
                lines.resize(processors);
                for (int i = 0; i < processors; i++) {
                        lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
@@ -918,6 +928,8 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 thisItersLookup.clear();
                 for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
             }
+            
+            if (m->debug) {  m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
                }
         
                if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }
index 2da240e21b5103d4d77b6eb24d0f0c335f3a7d3c..aebb16b42a6455693b7c491f83b1740c58acd0e0 100644 (file)
@@ -47,6 +47,7 @@ string TrimFlowsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+        helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
                return helpString;
index 81b9111b17dd8bba332ad8a8267cc12bf16e235a..16d83de2596b9cf84ab4069789917878fa89559e 100644 (file)
@@ -1391,9 +1391,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                         string sname = "";  nameStream >> sname;
                         sname = sname.substr(1);
                         
-                        for (int i = 0; i < sname.length(); i++) {
-                            if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
-                        }
+                        m->checkName(sname);
                         
                         map<string, int>::iterator it = firstSeqNames.find(sname);
                         
index 56694e32e3a977b08df087dfc8deeabb2524564c..a845f9bc2fa54a5c27b0d74aed20929f1aeb756f 100644 (file)
@@ -49,31 +49,21 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
                                namesOfGroupCombos.push_back(groups);
                        }
                }
+        
+        lines.clear();
+        int numPairs = namesOfGroupCombos.size();
+        int numPairsPerProcessor = numPairs / processors;
+        
+        for (int i = 0; i < processors; i++) {
+            int startPos = i * numPairsPerProcessor;
+            if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+            lines.push_back(linePair(startPos, numPairsPerProcessor));
+        }
+        
+        data = createProcesses(t, namesOfGroupCombos, ct);
 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                       if(processors == 1){
-                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
-                       }else{
-                               int numPairs = namesOfGroupCombos.size();
-                               
-                               int numPairsPerProcessor = numPairs / processors;
-                               
-                               for (int i = 0; i < processors; i++) {
-                                       int startPos = i * numPairsPerProcessor;
-                       
-                                       if(i == processors - 1){
-                                               numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                                       }
-               
-                                       lines.push_back(linePair(startPos, numPairsPerProcessor));
-                               }
-                               data = createProcesses(t, namesOfGroupCombos, ct);
-                               lines.clear();
-                       }
-               #else
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
-               #endif
-               
+        lines.clear();
+        
                return data;
        }
        catch(exception& e) {
@@ -85,11 +75,12 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
 
 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
+        int process = 1;
                vector<int> processIDS;
                
                EstOutput results;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -155,11 +146,47 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
                        in.close();
                        m->mothurRemove(s);
                }
+#else
+               //fill in functions
+        vector<unweightedData*> pDataArray;
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
+                       pDataArray.push_back(tempweighted);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+               }
+               
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
                
-               //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
-               return results;
-#endif         
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
+                       delete cts[i];
+            delete trees[i];
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
+#endif 
+        return results;
        }
        catch(exception& e) {
                m->errorOut(e, "Unweighted", "createProcesses");
@@ -175,10 +202,7 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                
                int count = 0;
                int total = num;
-               int twentyPercent = (total * 0.20);
-               if (twentyPercent == 0) { twentyPercent = 1; }
-        
-                       
+                                       
                for (int h = start; h < (start+num); h++) {
                                
                        if (m->control_pressed) { return results; }
@@ -240,12 +264,7 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                        }
                        count++;
 
-                       //report progress
-                       //if((count % twentyPercent) == 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
-               }
-               
-               //report progress
-               //if((count % twentyPercent) != 0) {    float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine();       }
+        }
                
                return results; 
        }
@@ -294,31 +313,20 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
                                namesOfGroupCombos.push_back(groups);
                        }
                }
+     
+        lines.clear();
+        int numPairs = namesOfGroupCombos.size();
+        int numPairsPerProcessor = numPairs / processors;
+     
+        for (int i = 0; i < processors; i++) {
+            int startPos = i * numPairsPerProcessor;
+            if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+            lines.push_back(linePair(startPos, numPairsPerProcessor));
+        }
+     
+        data = createProcesses(t, namesOfGroupCombos, true, ct);
+        lines.clear();
 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                       if(processors == 1){
-                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
-                       }else{
-                               int numPairs = namesOfGroupCombos.size();
-                               
-                               int numPairsPerProcessor = numPairs / processors;
-                               
-                               for (int i = 0; i < processors; i++) {
-                                       int startPos = i * numPairsPerProcessor;
-                                       if(i == processors - 1){
-                                               numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                                       }
-                                       lines.push_back(linePair(startPos, numPairsPerProcessor));
-                               }
-                                       
-                               data = createProcesses(t, namesOfGroupCombos, true, ct);
-                               
-                               lines.clear();
-                       }
-               #else
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
-               #endif
-       
                return data;
        }
        catch(exception& e) {
@@ -330,12 +338,12 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
 
 EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
+        int process = 1;
                vector<int> processIDS;
                
                EstOutput results;
-               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -399,9 +407,51 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
                        in.close();
                        m->mothurRemove(s);
                }
+#else
+       //for some reason it doesn't seem to be calculating hte random trees scores.  all scores are the same even though copytree appears to be randomized.
+        
+        /*
+        //fill in functions
+        vector<unweightedData*> pDataArray;
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
+                       pDataArray.push_back(tempweighted);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+               }
+               
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
                
-               return results;
-#endif         
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (int j = 0; j < pDataArray[i]->results.size(); j++) {  results.push_back(pDataArray[i]->results[j]);  }
+                       delete cts[i];
+            delete trees[i];
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }       */
+        
+        results = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), usingGroups, ct);
+#endif
+        return results;
        }
        catch(exception& e) {
                m->errorOut(e, "Unweighted", "createProcesses");
@@ -481,7 +531,7 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                                if (isnan(UW) || isinf(UW)) { UW = 0; }
                                
                                results[count] = UW;
-                       }
+            }
                        count++;
                        
                }
index b136b007b517e56645be544ee6556f83a45de649..0291853ce5929141c06d8b5fb2fe3fe992fa189f 100644 (file)
@@ -46,5 +46,307 @@ class Unweighted : public TreeCalculator  {
 };
 
 /***********************************************************************/
+struct unweightedData {
+    int start;
+       int num;
+       MothurOut* m;
+    EstOutput results;
+    vector< vector<string> > namesOfGroupCombos;
+    Tree* t;
+    CountTable* ct;
+    bool includeRoot;
+    
+       unweightedData(){}
+       unweightedData(MothurOut* mout, int st, int en, vector< vector<string> > ngc, Tree* tree, CountTable* count, bool ir) {
+        m = mout;
+               start = st;
+               num = en;
+        namesOfGroupCombos = ngc;
+        t = tree;
+        ct = count;
+        includeRoot = ir;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyUnWeightedThreadFunction(LPVOID lpParam){
+       unweightedData* pDataArray;
+       pDataArray = (unweightedData*)lpParam;
+       try {
+        pDataArray->results.resize(pDataArray->num);
+        map< vector<string>, set<int> > rootForGrouping;
+               
+               int count = 0;
+                       
+               for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
+                       if (pDataArray->m->control_pressed) { return 0; }
+            
+                       double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
+                       double totalBL = 0.00;  //all branch lengths
+                       double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
+            
+                       //find a node that belongs to one of the groups in this combo
+                       int nodeBelonging = -1;
+                       for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size(); g++) {
+                               if (pDataArray->t->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = pDataArray->t->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]][0]; break; }
+                       }
+                       
+                       //sanity check
+                       if (nodeBelonging == -1) {
+                               pDataArray->m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping ");
+                               for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size()-1; g++) { pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][g] + "-"); }
+                               pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][pDataArray->namesOfGroupCombos[h].size()-1]);
+                               pDataArray->m->mothurOut(", skipping."); pDataArray->m->mothurOutEndLine(); pDataArray->results[count] = UW;
+                       }else{
+                               
+                               //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
+                               //getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
+                               /////////////////////////////////////////////////////////////////////////////
+                //you are a leaf so get your parent
+                vector<string> grouping = pDataArray->namesOfGroupCombos[h];
+                int index = pDataArray->t->tree[nodeBelonging].getParent();
+                
+                if (pDataArray->includeRoot) {
+                    rootForGrouping[grouping].clear();
+                }else {
+                    
+                    //my parent is a potential root
+                    rootForGrouping[grouping].insert(index);
+                    
+                    //while you aren't at root
+                    while(pDataArray->t->tree[index].getParent() != -1){
+                        //cout << index << endl;
+                        if (pDataArray->m->control_pressed) {  return 0; }
+                        
+                        //am I the root for this grouping? if so I want to stop "early"
+                        //does my sibling have descendants from the users groups?
+                        //if so I am not the root
+                        int parent = pDataArray->t->tree[index].getParent();
+                        int lc = pDataArray->t->tree[parent].getLChild();
+                        int rc = pDataArray->t->tree[parent].getRChild();
+                        
+                        int sib = lc;
+                        if (lc == index) { sib = rc; }
+                        
+                        map<string, int>::iterator itGroup;
+                        int pcountSize = 0;
+                        for (int j = 0; j < grouping.size(); j++) {
+                            map<string, int>::iterator itGroup = pDataArray->t->tree[sib].pcount.find(grouping[j]);
+                            if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+                        }
+                        
+                        //if yes, I am not the root
+                        if (pcountSize != 0) {
+                            rootForGrouping[grouping].clear();
+                            rootForGrouping[grouping].insert(parent);
+                        }
+                        
+                        index = parent;        
+                    }
+                    
+                    //get all nodes above the root to add so we don't add their u values above
+                    index = *(rootForGrouping[grouping].begin());
+                    while(pDataArray->t->tree[index].getParent() != -1){
+                        int parent = pDataArray->t->tree[index].getParent();
+                        rootForGrouping[grouping].insert(parent);
+                        //cout << parent << " in root" << endl;
+                        index = parent;
+                    }
+                }
+                /////////////////////////////////////////////////////////////////////////////
+                
+                               for(int i=0;i<pDataArray->t->getNumNodes();i++){
+                                       
+                                       if (pDataArray->m->control_pressed) {  return 0; }
+                                       //cout << i << endl;
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
+                                       
+                                       int pcountSize = 0;
+                                       for (int j = 0; j < pDataArray->namesOfGroupCombos[h].size(); j++) {
+                                               map<string, int>::iterator itGroup = pDataArray->t->tree[i].pcount.find(pDataArray->namesOfGroupCombos[h][j]);
+                                               if (itGroup != pDataArray->t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+                                       }
+                                       
+                                       
+                                       //unique calc
+                                       if (pcountSize == 0) { }
+                                       else if ((pDataArray->t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root
+                                               UniqueBL += abs(pDataArray->t->tree[i].getBranchLength());
+                                       }
+                    
+                                       //total calc
+                                       if (pcountSize == 0) { }
+                                       else if ((pDataArray->t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root
+                                               totalBL += abs(pDataArray->t->tree[i].getBranchLength());
+                                       }
+                               }
+                //cout << UniqueBL << '\t' << totalBL << endl;
+                               UW = (UniqueBL / totalBL);
+                
+                               if (isnan(UW) || isinf(UW)) { UW = 0; }
+                
+                               pDataArray->results[count] = UW;
+                       }
+                       count++;
+               }
+               
+               return 0;
+        
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "UnWeighted", "MyUnWeightedThreadFunction");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+static DWORD WINAPI MyUnWeightedRandomThreadFunction(LPVOID lpParam){
+       unweightedData* pDataArray;
+       pDataArray = (unweightedData*)lpParam;
+       try {
+        pDataArray->results.resize(pDataArray->num);
+               
+               int count = 0;
+               
+               Tree* copyTree = new Tree(pDataArray->ct);
+               
+               for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
+            
+                       if (pDataArray->m->control_pressed) { return 0; }
+            
+            map< vector<string>, set<int> > rootForGrouping;
+            
+                       //copy random tree passed in
+                       copyTree->getCopy(pDataArray->t);
+            
+                       //swap labels in the groups you want to compare
+                       copyTree->assembleRandomUnifracTree(pDataArray->namesOfGroupCombos[h]);
+                       
+                       double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
+                       double totalBL = 0.00;  //all branch lengths
+                       double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
+                       //find a node that belongs to one of the groups in this combo
+                       int nodeBelonging = -1;
+                       for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size(); g++) {
+                               if (copyTree->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]][0]; break; }
+                       }
+                       
+                       //sanity check
+                       if (nodeBelonging == -1) {
+                               pDataArray->m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping ");
+                               for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size()-1; g++) { pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][g] + "-"); }
+                               pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][pDataArray->namesOfGroupCombos[h].size()-1]);
+                               pDataArray->m->mothurOut(", skipping."); pDataArray->m->mothurOutEndLine(); pDataArray->results[count] = UW;
+                       }else{
+                               
+                               //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
+                               //getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
+                /////////////////////////////////////////////////////////////////////////////
+                //you are a leaf so get your parent
+                vector<string> grouping = pDataArray->namesOfGroupCombos[h];
+                int index = copyTree->tree[nodeBelonging].getParent();
+                
+                if (pDataArray->includeRoot) {
+                    rootForGrouping[grouping].clear();
+                }else {
+                    
+                    //my parent is a potential root
+                    rootForGrouping[grouping].insert(index);
+                    
+                    //while you aren't at root
+                    while(copyTree->tree[index].getParent() != -1){
+                        //cout << index << endl;
+                        if (pDataArray->m->control_pressed) {  return 0; }
+                        
+                        //am I the root for this grouping? if so I want to stop "early"
+                        //does my sibling have descendants from the users groups?
+                        //if so I am not the root
+                        int parent = copyTree->tree[index].getParent();
+                        int lc = copyTree->tree[parent].getLChild();
+                        int rc = copyTree->tree[parent].getRChild();
+                        
+                        int sib = lc;
+                        if (lc == index) { sib = rc; }
+                        
+                        map<string, int>::iterator itGroup;
+                        int pcountSize = 0;
+                        for (int j = 0; j < grouping.size(); j++) {
+                            map<string, int>::iterator itGroup = copyTree->tree[sib].pcount.find(grouping[j]);
+                            if (itGroup != copyTree->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+                        }
+                        
+                        //if yes, I am not the root
+                        if (pcountSize != 0) {
+                            rootForGrouping[grouping].clear();
+                            rootForGrouping[grouping].insert(parent);
+                        }
+                        
+                        index = parent;
+                    }
+                    
+                    //get all nodes above the root to add so we don't add their u values above
+                    index = *(rootForGrouping[grouping].begin());
+                    while(copyTree->tree[index].getParent() != -1){
+                        int parent = copyTree->tree[index].getParent();
+                        rootForGrouping[grouping].insert(parent);
+                        //cout << parent << " in root" << endl;
+                        index = parent;
+                    }
+                }
+                /////////////////////////////////////////////////////////////////////////////
+                               for(int i=0;i<copyTree->getNumNodes();i++){
+                                       
+                                       if (pDataArray->m->control_pressed) {  return 0; }
+                                       
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
+                    int pcountSize = 0;
+                                       for (int j = 0; j < pDataArray->namesOfGroupCombos[h].size(); j++) {
+                                               map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(pDataArray->namesOfGroupCombos[h][j]);
+                                               if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+                                       }
+                                       
+                                       //unique calc
+                                       if (pcountSize == 0) { }
+                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root
+                                               UniqueBL += abs(copyTree->tree[i].getBranchLength());
+                                       }
+                                       
+                                       //total calc
+                                       if (pcountSize == 0) { }
+                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root
+                                               totalBL += abs(copyTree->tree[i].getBranchLength());
+                                       }
+                                       
+                               }
+                               cout << h << '\t' << UniqueBL << '\t' << totalBL << endl;
+                               UW = (UniqueBL / totalBL);
+                               
+                               if (isnan(UW) || isinf(UW)) { UW = 0; }
+                               
+                               pDataArray->results[count] = UW;
+                cout << h << '\t' << UW << endl;
+                       }
+                       count++;
+                       
+               }
+               
+               delete copyTree;
+               
+               return 0;
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "UnWeighted", "MyUnWeightedRandomThreadFunction");
+               exit(1);
+       }
+}
+
+#endif
+
 
 #endif