]> git.donarmstrong.com Git - mothur.git/commitdiff
removed unused copy constructors and comments within comments that where causing...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 20 Feb 2012 20:33:22 +0000 (15:33 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 20 Feb 2012 20:33:22 +0000 (15:33 -0500)
51 files changed:
Mothur.xcodeproj/project.pbxproj
alignmentdb.cpp
alignmentdb.h
bayesian.cpp
blastdb.hpp
chimeraperseuscommand.cpp
chimeraperseuscommand.h
chopseqscommand.cpp
classifyseqscommand.cpp
classifyseqscommand.h
classifytreecommand.cpp [new file with mode: 0644]
classifytreecommand.h [new file with mode: 0644]
clusterclassic.cpp
clustersplitcommand.cpp
clustersplitcommand.h
commandfactory.cpp
countseqscommand.cpp
database.hpp
decalc.cpp
distancecommand.cpp
distancedb.cpp
distancedb.hpp
eachgapdist.h
hcluster.cpp
ignoregaps.h
kmerdb.hpp
maligner.cpp
mothurmetastats.cpp
mothurout.cpp
mothurout.h
myPerseus.cpp
onegapdist.h
onegapignore.h
phylodiversity.cpp
preclustercommand.cpp
referencedb.cpp
seqnoise.cpp
sequence.hpp
sharedrabundfloatvector.cpp
sharedrabundvector.cpp
shhhercommand.cpp
shhhercommand.h
suffixdb.hpp
suffixnodes.hpp
suffixtree.cpp
suffixtree.hpp
tree.cpp
trimoligos.cpp
trimoligos.h
trimseqscommand.cpp
trimseqscommand.h

index 2e61594d5719fa20176872c5c35140882901a649..702e3bb8471e855b9a3a0e1e2546e564e7e0d135 100644 (file)
                A7E9B98D12D37EC400DA6239 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87C12D37EC400DA6239 /* weighted.cpp */; };
                A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; };
                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; };
+               A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; };
                A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */; };
                A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; };
                A7FA2AC714A0E881007C09A6 /* bsplvb.f in Sources */ = {isa = PBXBuildFile; fileRef = A7FA2ABC14A0E881007C09A6 /* bsplvb.f */; };
                A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = SOURCE_ROOT; };
                A7E9B87F12D37EC400DA6239 /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whittaker.cpp; sourceTree = "<group>"; };
                A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
+               A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = classifytreecommand.cpp; sourceTree = "<group>"; };
+               A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = classifytreecommand.h; sourceTree = "<group>"; };
                A7F9F5CD141A5E500032F693 /* sequenceparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequenceparser.h; sourceTree = "<group>"; };
                A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequenceparser.cpp; sourceTree = "<group>"; };
                A7FA10001302E096003860FE /* mantelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mantelcommand.h; sourceTree = "<group>"; };
                                A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */,
                                A7E9B69312D37EC400DA6239 /* classifyseqscommand.h */,
                                A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */,
+                               A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */,
+                               A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */,
                                A7E9B69712D37EC400DA6239 /* clearcutcommand.h */,
                                A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */,
                                A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */,
                                A7FA2B5B14A0F0C2007C09A6 /* intrv.f in Sources */,
                                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */,
                                A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */,
+                               A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                GCC_DYNAMIC_NO_PIC = NO;
                                GCC_ENABLE_FIX_AND_CONTINUE = YES;
                                GCC_MODEL_TUNING = G5;
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                INSTALL_PATH = /usr/local/bin;
                                PRODUCT_NAME = Mothur;
                                SDKROOT = macosx10.6;
                                GCC_ENABLE_SSE3_EXTENSIONS = NO;
                                GCC_ENABLE_SSE41_EXTENSIONS = NO;
                                GCC_ENABLE_SSE42_EXTENSIONS = NO;
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
                                        "VERSION=\"\\\"1.23.0\\\"\"",
                                GCC_C_LANGUAGE_STANDARD = gnu99;
                                GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
                                GCC_MODEL_TUNING = "";
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "VERSION=\"\\\"1.19.0\\\"\"",
                                        "RELEASE_DATE=\"\\\"5/9/2011\\\"\"",
index df4eaec783dbb81e33c09c3d97473c02425ba1ce..f59c5db7d6e6ef2d039ccdda8b48a6ea3fb6428a 100644 (file)
 #include "blastdb.hpp"
 #include "referencedb.h"
 
-/**************************************************************************************************/
-//deep copy
-AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence), threadID(adb.threadID) {
-       try {
-               
-               m = MothurOut::getInstance();
-               if (adb.method == "blast") {
-                       search = new BlastDB(*((BlastDB*)adb.search));
-               }else if(adb.method == "kmer") {
-                       search = new KmerDB(*((KmerDB*)adb.search));
-               }else if(adb.method == "suffix") {
-                       search = new SuffixDB(*((SuffixDB*)adb.search));
-               }else {
-                       m->mothurOut("[ERROR]: cannot create copy of alignment database, unrecognized method - " + adb.method); m->mothurOutEndLine();
-               }
-               
-               for (int i = 0; i < adb.templateSequences.size(); i++) {
-                       Sequence temp(adb.templateSequences[i]);
-                       templateSequences.push_back(temp);
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");
-               exit(1);
-       }
-       
-}
 /**************************************************************************************************/
 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){          //      This assumes that the template database is in fasta format, may 
        try {                                                                                   //      need to alter this in the future?
index 900aadc6eb8be36b9c0193f8d255a2fdc8312269..537af8d21548e118a8d7470133be1acd4aab11fb 100644 (file)
@@ -22,7 +22,6 @@ public:
 
        AlignmentDB(string, string, int, float, float, float, float, int);  //reads fastafile passed in and stores sequences
        AlignmentDB(string);
-       AlignmentDB(const AlignmentDB& adb);
        ~AlignmentDB();
        
        Sequence findClosestSequence(Sequence*);
index f7ea6e4351868a20a191169b995e94faff6fa053..eca63b1a9168613a92232777e520efcaa586f328 100644 (file)
@@ -111,10 +111,9 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
                                //initialze probabilities
                                wordGenusProb.resize(numKmers);
                                WordPairDiffArr.resize(numKmers);
-                       //cout << numKmers << '\t' << genusNodes.size() << endl;
+                       
                                for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
-                       //cout << numKmers << '\t' << genusNodes.size() << endl;        
-                               ofstream out;
+                    ofstream out;
                                ofstream out2;
                                
                                #ifdef USE_MPI
@@ -505,7 +504,7 @@ map<string, int> Bayesian::parseTaxMap(string newTax) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string inNumName) {
        try{
                
index e2f4f57180560ae287c644a527553abde89e45e6..50a8379b68ec89130b7df1892aab4538daa77a1e 100644 (file)
@@ -18,8 +18,6 @@ class BlastDB : public Database {
 public:
        BlastDB(string, float, float, float, float, string, int);
        BlastDB(string, int);
-       BlastDB(const BlastDB& bdb) : dbFileName(bdb.dbFileName), queryFileName(bdb.queryFileName), blastFileName(bdb.blastFileName), path(bdb.path),
-                                                                       count(bdb.count), gapOpen(bdb.gapOpen), gapExtend(bdb.gapExtend), match(bdb.match), misMatch(bdb.misMatch), Database(bdb) {}
        ~BlastDB();
        
        void generateDB();
index 8eaf536040a5703bbd7d9f4b93329f79cf6c6830..76f0103c38ec64a5ebc21f3175df441a15ccae0e 100644 (file)
@@ -533,6 +533,7 @@ vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, str
                
                vector<seqData> sequences;
                bool error = false;
+        alignLength = 0;
                
                for (int i = 0; i < thisGroupsSeqs.size(); i++) {
                
@@ -543,6 +544,7 @@ vector<seqData> ChimeraPerseusCommand::loadSequences(SequenceParser& parser, str
                        else {
                                int num = m->getNumNames(it->second);
                                sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num));
+                if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); }
                        }
                }
                
@@ -570,7 +572,8 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                bool error = false;
                ifstream in;
                m->openInputFile(inputFile, in);
-               
+               alignLength = 0;
+        
                while (!in.eof()) {
                        
                        if (m->control_pressed) { in.close(); return sequences; }
@@ -581,6 +584,7 @@ vector<seqData> ChimeraPerseusCommand::readFiles(string inputFile, string name){
                        if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct."); m->mothurOutEndLine(); }
                        else {
                                sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second));
+                if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); }
                        }
                }
                in.close();
@@ -625,7 +629,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                }
                
                int numSeqs = sequences.size();
-               int alignLength = sequences[0].sequence.size();
+               //int alignLength = sequences[0].sequence.size();
                
                ofstream chimeraFile;
                ofstream accnosFile;
@@ -641,7 +645,7 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                
                for(int i=0;i<numSeqs;i++){     
                        if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
-                       
+    
                        vector<bool> restricted = chimeras;
                        
                        vector<vector<int> > leftDiffs(numSeqs);
@@ -662,7 +666,9 @@ int ChimeraPerseusCommand::driver(string chimeraFileName, vector<seqData>& seque
                        
                        string dummyA, dummyB;
                        
-                       if(comparisons >= 2){   
+            if (sequences[i].sequence.size() < 3) { 
+                chimeraFile << i << '\t' << sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl;
+            }else if(comparisons >= 2){        
                                minMismatchToChimera = myPerseus.getChimera(sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted);
                                if (m->control_pressed) { chimeraFile.close(); m->mothurRemove(chimeraFileName); accnosFile.close(); m->mothurRemove(accnosFileName); return 0; }
 
index 84563ac6ecc006ca7150f7f5e025b340da964fdf..45627500f6f8f9944aee508db3ffbbd8960956c1 100644 (file)
@@ -44,7 +44,7 @@ private:
        
        bool abort;
        string fastafile, groupfile, outputDir, namefile;
-       int processors;
+       int processors, alignLength;
        double cutoff, alpha, beta;
        
        vector<string> outputNames;
index 68576cdb521efc994bedf2d3b2d40cc98a740b9c..4e06201cd352104b5ee9c9d1c41764d16473003e 100644 (file)
@@ -233,7 +233,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        
                                        for (int i = 0; i < temp.length(); i++) {
                                                //eliminate N's
-                                               if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
+                                               if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
                                                
                                                numBasesCounted++; 
                                                
@@ -255,7 +255,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        
                                        for (int i = (temp.length()-1); i >= 0; i--) {
                                                //eliminate N's
-                                               if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
+                                               if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
                                                
                                                numBasesCounted++; 
 
@@ -283,7 +283,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        for (int i = 0; i < temp.length(); i++) {
                                                //eliminate N's
                                                if (toupper(temp[i]) == 'N') { 
-                                                       temp[i] == '.'; 
+                                                       temp[i] = '.'; 
                                                        tempLength--;
                                                        if (tempLength < numbases) { stopSpot = 0; break; }
                                                }
@@ -309,7 +309,7 @@ string ChopSeqsCommand::getChopped(Sequence seq) {
                                        for (int i = (temp.length()-1); i >= 0; i--) {
                                                //eliminate N's
                                                if (toupper(temp[i]) == 'N') { 
-                                                       temp[i] == '.'; 
+                                                       temp[i] = '.'; 
                                                        tempLength--;
                                                        if (tempLength < numbases) { stopSpot = 0; break; }
                                                }
index 328cd58f1f32bc265de857de4ede5aa557c2cd94..794b961dcc29ecdc79bf063d116887b4cf8c30ac 100644 (file)
@@ -881,7 +881,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                        string extension = "";
                        if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        
-                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flipThreshold);
+                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flip);
                        pDataArray.push_back(tempclass);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
index 0bf4a9154412080d28fa065ab33789279832fbeb..f0c67ba3d1ff812cef4026c43a81503b9379eec4 100644 (file)
@@ -163,7 +163,7 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                //make classify
                Classify* myclassify;
                if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);         }
-               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID, pDataArray->flipThreshold);                                }
+               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
                else {
                        pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
                        pDataArray->m->mothurOutEndLine();
diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp
new file mode 100644 (file)
index 0000000..e1ef6d3
--- /dev/null
@@ -0,0 +1,579 @@
+//
+//  classifytreecommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/20/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "classifytreecommand.h"
+#include "phylotree.h"
+
+//**********************************************************************************************************************
+vector<string> ClassifyTreeCommand::setParameters(){   
+       try {
+               CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree);
+        CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy);
+        CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname);
+        CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getHelpString(){   
+       try {
+               string helpString = "";
+               helpString += "The classify.tree command reads a tree and taxonomy file and output the concensus taxonomy for each node on the tree. \n";
+               helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
+               helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
+               helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
+               helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n";
+        helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
+        helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n";
+               helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; 
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+ClassifyTreeCommand::ClassifyTreeCommand(){    
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["tree"] = tempOutNames;
+               outputTypes["summary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       m->runParse = true;
+                       m->clearGroups();
+                       m->clearAllGroups();
+                       m->Treenames.clear();
+                       m->names.clear();
+                       
+                       vector<string> tempOutNames;
+                       outputTypes["tree"] = tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("tree");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["tree"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("taxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
+                               }
+                       }
+                       
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+            
+                       //check for required parameters
+                       treefile = validParameter.validFile(parameters, "tree", true);
+                       if (treefile == "not open") { treefile = ""; abort = true; }
+                       else if (treefile == "not found") { treefile = ""; 
+                treefile = m->getTreeFile(); 
+                if (treefile != "") {  m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
+                else { m->mothurOut("No valid current files. You must provide a tree file."); m->mothurOutEndLine(); abort = true; }
+            }else { m->setTreeFile(treefile); }        
+            
+            taxonomyfile = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxonomyfile == "not open") { taxonomyfile = ""; abort = true; }
+                       else if (taxonomyfile == "not found") { taxonomyfile = ""; 
+                taxonomyfile = m->getTaxonomyFile(); 
+                if (taxonomyfile != "") {  m->mothurOut("Using " + taxonomyfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
+                else { m->mothurOut("No valid current files. You must provide a taxonomy file."); m->mothurOutEndLine(); abort = true; }
+            }else { m->setTaxonomyFile(taxonomyfile); }        
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { namefile = ""; abort = true; }
+                       else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+            
+            string temp = validParameter.validFile(parameters, "cutoff", false);                       if (temp == "not found") { temp = "51"; }
+                       m->mothurConvert(temp, cutoff); 
+                       
+                       if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
+            
+            if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand");           
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ClassifyTreeCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
+               
+               int start = time(NULL);
+        
+               /***************************************************/
+               //    reading tree info                                                    //
+               /***************************************************/
+        m->setTreeFile(treefile);
+        if (groupfile != "") {
+                       //read in group map info.
+                       tmap = new TreeMap(groupfile);
+                       tmap->readMap();
+               }else{ //fake out by putting everyone in one group
+                       Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
+                       tmap = new TreeMap();
+                       
+                       for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
+               }
+               
+               if (namefile != "") { readNamesFile(); }
+               
+               read = new ReadNewickTree(treefile);
+               int readOk = read->read(tmap); 
+               
+               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
+               
+               read->AssembleTrees();
+               vector<Tree*> T = read->getTrees();
+        Tree* outputTree = T[0]; 
+               delete read;
+               
+               //make sure all files match
+               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+               int numNamesInTree;
+               if (namefile != "")  {  
+                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       else {   numNamesInTree = m->Treenames.size();  }
+               }else {  numNamesInTree = m->Treenames.size();  }
+               
+               
+               //output any names that are in group file but not in tree
+               if (numNamesInTree < tmap->getNumSeqs()) {
+                       for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+                               //is that name in the tree?
+                               int count = 0;
+                               for (int j = 0; j < m->Treenames.size(); j++) {
+                                       if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
+                                       count++;
+                               }
+                               
+                               if (m->control_pressed) { 
+                                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+                                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
+                                       m->clearGroups();
+                                       return 0;
+                               }
+                               
+                               //then you did not find it so report it 
+                               if (count == m->Treenames.size()) { 
+                                       //if it is in your namefile then don't remove
+                                       map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
+                                       
+                                       if (it == nameMap.end()) {
+                                               m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                                               tmap->removeSeq(tmap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
+                                       }
+                               }
+                       }
+               }
+                        
+        if (m->control_pressed) { delete outputTree; delete tmap;  return 0; }
+               
+        readTaxonomyFile();
+        
+        
+        /***************************************************/
+        //             get concensus taxonomies                    //
+        /***************************************************/
+        getClassifications(outputTree);
+        delete outputTree; delete tmap;
+                       
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
+               
+               //set tree file as new current treefile
+               if (treefile != "") {
+                       string current = "";
+                       itTypes = outputTypes.find("tree");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
+                       }
+               }
+               
+               m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the concensus taxonomies."); m->mothurOutEndLine();
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "execute");       
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//traverse tree finding concensus taxonomy at each node
+//label node with a number to relate to output summary file
+//report all concensus taxonomies to file 
+int ClassifyTreeCommand::getClassifications(Tree*& T){
+       try {
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(treefile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.summary";
+               outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               //print headings
+               out << "TreeNode\t";
+               if (groupfile != "") { out << "Group\t"; } 
+        out << "NumRep\tTaxonomy" << endl; 
+               
+               string treeOutputDir = outputDir;
+               if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
+               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "taxonomy.tre";
+               
+               //create a map from tree node index to names of descendants, save time later
+               map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
+               for (int i = 0; i < T->getNumNodes(); i++) {
+                       if (m->control_pressed) { return 0; }
+                       
+                       nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants);
+               }
+               
+               //for each node
+               for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
+                       
+                       if (m->control_pressed) { out.close(); return 0; }
+            
+                       string tax = "not classifed";
+            int size;
+            if (groupfile != "") {
+                for (map<string, set<string> >::iterator itGroups = nodeToDescendants[i].begin(); itGroups != nodeToDescendants[i].end(); itGroups++) {
+                    if (itGroups->first != "AllGroups") {
+                        tax = getTaxonomy(itGroups->second, size);
+                        out << (i+1) << '\t' << itGroups->first << '\t' << size << '\t' << tax << endl;
+                    }
+                }
+            }else {
+                string group = "AllGroups";
+                tax = getTaxonomy(nodeToDescendants[i][group], size);
+                out << (i+1) << '\t' << size << '\t' << tax << endl;
+            }
+                               
+                       T->tree[i].setLabel((i+1));
+               }
+               out.close();
+        
+               ofstream outTree;
+               m->openOutputFile(outputTreeFileName, outTree);
+               outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
+               T->print(outTree, "both");
+               outTree.close();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "GetConcensusTaxonomies");        
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
+       try{
+               string conTax = "";
+        size = 0;
+                       
+               //create a tree containing sequences from this bin
+               PhyloTree* phylo = new PhyloTree();
+               
+               for (set<string>::iterator it = names.begin(); it != names.end(); it++) {
+            
+            
+                       //if namesfile include the names
+                       if (namefile != "") {
+                
+                               //is this sequence in the name file - namemap maps seqName -> repSeqName
+                               map<string, string>::iterator it2 = nameMap.find(*it);
+                               
+                               if (it2 == nameMap.end()) { //this name is not in name file, skip it
+                                       m->mothurOut((*it) + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
+                               }else{
+                                       
+                                       //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                                       map<string, string>::iterator itTax = taxMap.find((it2->second));
+                    
+                                       if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
+                        
+                                               if ((*it) != (it2->second)) { m->mothurOut((*it) + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
+                                               else {  m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
+                                       }else{
+                                               //add seq to tree
+                        int num = nameCount[(*it)]; // we know its there since we found it in nameMap
+                                               for (int i = 0; i < num; i++) {  phylo->addSeqToTree((*it)+toString(i), it2->second);  }
+                        size += num;
+                                       }
+                               }
+                               
+                       }else{
+                               //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                               map<string, string>::iterator itTax = taxMap.find((*it));
+                
+                               if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
+                                       m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
+                               }else{
+                                       //add seq to tree
+                                       phylo->addSeqToTree((*it), itTax->second);
+                    size++;
+                               }
+                       }
+            
+                       if (m->control_pressed) { delete phylo; return conTax; }
+                       
+               }
+               
+               //build tree
+               phylo->assignHeirarchyIDs(0);
+               
+               TaxNode currentNode = phylo->get(0);
+               int myLevel = 0;        
+               //at each level
+               while (currentNode.children.size() != 0) { //you still have more to explore
+            
+                       TaxNode bestChild;
+                       int bestChildSize = 0;
+                       
+                       //go through children
+                       for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
+                               
+                               TaxNode temp = phylo->get(itChild->second);
+                               
+                               //select child with largest accesions - most seqs assigned to it
+                               if (temp.accessions.size() > bestChildSize) {
+                                       bestChild = phylo->get(itChild->second);
+                                       bestChildSize = temp.accessions.size();
+                               }
+                               
+                       }
+            
+                       //is this taxonomy above cutoff
+                       int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
+                       
+                       if (consensusConfidence >= cutoff) { //if yes, add it
+                conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
+                               myLevel++;
+                       }else{ //if no, quit
+                               break;
+                       }
+                       
+                       //move down a level
+                       currentNode = bestChild;
+               }
+               
+               if (myLevel != phylo->getMaxLevel()) {
+                       while (myLevel != phylo->getMaxLevel()) {
+                               conTax += "unclassified;";
+                               myLevel++;
+                       }
+               }               
+               if (conTax == "") {  conTax = "no_consensus;";  }
+               
+               delete phylo;   
+        
+        return conTax;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getTaxonomy");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i, map<int, map<string, set<string> > > descendants){
+       try {
+               map<string ,set<string> > names;
+               
+               map<string ,set<string> >::iterator it;
+        map<string ,set<string> >::iterator it2;
+               
+               int lc = T->tree[i].getLChild();
+               int rc = T->tree[i].getRChild();
+               
+               if (lc == -1) { //you are a leaf your only descendant is yourself
+            string group = tmap->getGroup(T->tree[i].getName());
+            set<string> mynames; mynames.insert(T->tree[i].getName());
+            names[group] = mynames; //mygroup -> me
+            names["AllGroups"] = mynames;
+               }else{ //your descedants are the combination of your childrens descendants
+                       names = descendants[lc];
+                       for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
+                it2 = names.find(it->first); //do we already have this group
+                if (it2 == names.end()) { //nope, so add it
+                    names[it->first] = it->second;
+                }else {
+                    for (set<string>::iterator it3 = (it->second).begin(); it3 != (it->second).end(); it3++) {
+                        names[it->first].insert(*it3);
+                    }
+                }
+                               
+                       }
+               }
+               
+               return names;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "getDescendantList");     
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ClassifyTreeCommand::readTaxonomyFile() {
+       try {
+               
+               ifstream in;
+               m->openInputFile(taxonomyfile, in);
+               
+               string name, tax;
+        
+               while(!in.eof()){
+                       in >> name >> tax;              
+                       m->gobble(in);
+                       
+                       //are there confidence scores, if so remove them
+                       if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
+                       
+                       taxMap[name] = tax;
+                       
+                       if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
+               }
+               in.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "readTaxonomyFile");
+               exit(1);
+       }
+}
+
+/*****************************************************************/
+int ClassifyTreeCommand::readNamesFile() {
+       try {
+               ifstream inNames;
+               m->openInputFile(namefile, inNames);
+               
+               string name, names;
+        
+               while(!inNames.eof()){
+                       inNames >> name;                        //read from first column  A
+                       inNames >> names;               //read from second column  A,B,C,D
+                       m->gobble(inNames);
+                       
+                       //parse names into vector
+                       vector<string> theseNames;
+                       m->splitAtComma(names, theseNames);
+            
+                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
+            nameCount[name] = theseNames.size();
+                       
+                       if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
+               }
+               inNames.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyTreeCommand", "readNamesFile");
+               exit(1);
+       }
+}
+
+/*****************************************************************/
+
+
diff --git a/classifytreecommand.h b/classifytreecommand.h
new file mode 100644 (file)
index 0000000..bd0e1ce
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef Mothur_classifytreecommand_h
+#define Mothur_classifytreecommand_h
+
+//
+//  classifytreecommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 2/20/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+#include "readtree.h"
+#include "treemap.h"
+
+class ClassifyTreeCommand : public Command {
+public:
+       ClassifyTreeCommand(string);
+       ClassifyTreeCommand();
+       ~ClassifyTreeCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "classify.tree";                               }
+       string getCommandCategory()             { return "Phylotype Analysis";          }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Classify.tree"; }
+       string getDescription()         { return "Find the concensus taxonomy for the descendant of each tree node"; }
+    
+       int execute();
+       void help() { m->mothurOut(getHelpString()); }  
+       
+private:
+       ReadTree* read;
+    TreeMap* tmap;
+       string treefile, taxonomyfile, groupfile, namefile, outputDir;
+       bool abort;
+       vector<string> outputNames;
+    int numUniquesInName, cutoff;
+    map<string, string> nameMap;
+    map<string, int> nameCount;
+    map<string, string> taxMap;
+       
+       int getClassifications(Tree*&);
+       map<string, set<string> > getDescendantList(Tree*&, int, map<int, map<string, set<string> > >);
+    string getTaxonomy(set<string>, int&);
+    int readNamesFile(); 
+    int readTaxonomyFile();
+       
+};
+
+
+
+#endif
index 0048dc6053d0bab06130eeaa6ab5d10b576aeb48..1ce81c4f2affda469164709f862bb36ff023c163 100644 (file)
@@ -434,11 +434,11 @@ void ClusterClassic::print() {
 try {
                //update location of seqs in smallRow since they move to smallCol now
                for (int i = 0; i < dMatrix.size(); i++) {
-                       cout << "row = " << i << '\t';
+                       m->mothurOut("row = " + toString(i) + "\t");
                        for (int j = 0; j < dMatrix[i].size(); j++) {
-                               cout << dMatrix[i][j] << '\t';
+                               m->mothurOut(toString(dMatrix[i][j]) + "\t");
                        }
-                       cout << endl;
+                       m->mothurOutEndLine();
                }
        }
        catch(exception& e) {
index 34caf654124886f9cd23638a149b2b3487ca53e2..bb9296fe93b25e663d8f0f226dfaf8b4fadc4b54 100644 (file)
@@ -8,12 +8,7 @@
  */
 
 #include "clustersplitcommand.h"
-#include "readcluster.h"
-#include "splitmatrix.h"
-#include "readphylip.h"
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-#include "inputdata.h"
+
 
 
 //**********************************************************************************************************************
@@ -555,7 +550,7 @@ int ClusterSplitCommand::execute(){
                MPI_Barrier(MPI_COMM_WORLD);
                
        #else
-               
+               ///////////////////// WINDOWS CAN ONLY USE 1 PROCESSORS ACCESS VIOLATION UNRESOLVED ///////////////////////
                //sanity check
                if (processors > distName.size()) { processors = distName.size(); }
                
@@ -563,66 +558,8 @@ int ClusterSplitCommand::execute(){
                                if(processors == 1){
                                        listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                                }else{
-                                       
-                                       //cout << processors << '\t' << distName.size() << endl;
-                                       vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
-                                       dividedNames.resize(processors);
-                                       
-                                       //for each file group figure out which process will complete it
-                                       //want to divide the load intelligently so the big files are spread between processes
-                                       for (int i = 0; i < distName.size(); i++) { 
-                                               //cout << i << endl;
-                                               int processToAssign = (i+1) % processors; 
-                                               if (processToAssign == 0) { processToAssign = processors; }
-                                               
-                                               dividedNames[(processToAssign-1)].push_back(distName[i]);
-                                       }
-                                       
-                                       //not lets reverse the order of ever other process, so we balance big files running with little ones
-                                       for (int i = 0; i < processors; i++) {
-                                               //cout << i << endl;
-                                               int remainder = ((i+1) % processors);
-                                               if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
-                                       }
-                                       
-                                       createProcesses(dividedNames);
-                                                       
-                                       if (m->control_pressed) { return 0; }
-
-                                       //get list of list file names from each process
-                                       for(int i=0;i<processors;i++){
-                                               string filename = toString(processIDS[i]) + ".temp";
-                                               ifstream in;
-                                               m->openInputFile(filename, in);
-                                               
-                                               in >> tag; m->gobble(in);
-                                               
-                                               while(!in.eof()) {
-                                                       string tempName;
-                                                       in >> tempName; m->gobble(in);
-                                                       listFileNames.push_back(tempName);
-                                               }
-                                               in.close();
-                                               m->mothurRemove((toString(processIDS[i]) + ".temp"));
-                                               
-                                               //get labels
-                                               filename = toString(processIDS[i]) + ".temp.labels";
-                                               ifstream in2;
-                                               m->openInputFile(filename, in2);
-                                               
-                                               float tempCutoff;
-                                               in2 >> tempCutoff; m->gobble(in2);
-                                               if (tempCutoff < cutoff) { cutoff = tempCutoff; }
-                                               
-                                               while(!in2.eof()) {
-                                                       string tempName;
-                                                       in2 >> tempName; m->gobble(in2);
-                                                       if (labels.count(tempName) == 0) { labels.insert(tempName); }
-                                               }
-                                               in2.close();
-                                               m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
-                                       }
-                               }
+                                       listFileNames = createProcesses(distName, labels);
+                }
                #else
                                listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                #endif
@@ -904,12 +841,35 @@ void ClusterSplitCommand::printData(ListVector* oldList){
        }
 }
 //**********************************************************************************************************************
-int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
+vector<string>  ClusterSplitCommand::createProcesses(vector< map<string, string> > distName, set<string>& labels){
        try {
+        
+        vector<string> listFiles;
+        vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
+        dividedNames.resize(processors);
+        
+        //for each file group figure out which process will complete it
+        //want to divide the load intelligently so the big files are spread between processes
+        for (int i = 0; i < distName.size(); i++) { 
+            //cout << i << endl;
+            int processToAssign = (i+1) % processors; 
+            if (processToAssign == 0) { processToAssign = processors; }
+            
+            dividedNames[(processToAssign-1)].push_back(distName[i]);
+            if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); }
+        }
+        
+        //not lets reverse the order of ever other process, so we balance big files running with little ones
+        for (int i = 0; i < processors; i++) {
+            //cout << i << endl;
+            int remainder = ((i+1) % processors);
+            if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
+        }
+        
+        if (m->control_pressed) { return listFiles; }
        
        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               int exitCommand = 1;
+               int process = 1;
                processIDS.clear();
                
                //loop through and create all the processes you want
@@ -950,14 +910,99 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
                        }
                }
                
+        //do your part
+        listFiles = cluster(dividedNames[0], labels);
+        
                //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
+               for (int i=0;i< processIDS.size();i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
                }
+        
+        //get list of list file names from each process
+        for(int i=0;i<processIDS.size();i++){
+            string filename = toString(processIDS[i]) + ".temp";
+            ifstream in;
+            m->openInputFile(filename, in);
+            
+            in >> tag; m->gobble(in);
+            
+            while(!in.eof()) {
+                string tempName;
+                in >> tempName; m->gobble(in);
+                listFiles.push_back(tempName);
+            }
+            in.close();
+            m->mothurRemove((toString(processIDS[i]) + ".temp"));
+            
+            //get labels
+            filename = toString(processIDS[i]) + ".temp.labels";
+            ifstream in2;
+            m->openInputFile(filename, in2);
+            
+            float tempCutoff;
+            in2 >> tempCutoff; m->gobble(in2);
+            if (tempCutoff < cutoff) { cutoff = tempCutoff; }
+            
+            while(!in2.eof()) {
+                string tempName;
+                in2 >> tempName; m->gobble(in2);
+                if (labels.count(tempName) == 0) { labels.insert(tempName); }
+            }
+            in2.close();
+            m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
+        }
+        
+
+    #else
+       
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the clusterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add labels.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
                
-               return exitCommand;
+               vector<clusterData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       clusterData* tempCluster = new clusterData(dividedNames[i], m, cutoff, method, outputDir, hard, precision, length, i);
+                       pDataArray.push_back(tempCluster);
+                       processIDS.push_back(i);
+            
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyClusterThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);  
+            
+               }
+        
+        //do your part
+        listFiles = cluster(dividedNames[0], labels);
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            //get tag
+            tag = pDataArray[i]->tag;
+            //get listfiles created
+            for(int j=0; j < pDataArray[i]->listFiles.size(); j++){ listFiles.push_back(pDataArray[i]->listFiles[j]); }
+            //get labels
+            set<string>::iterator it;
+            for(it = pDataArray[i]->labels.begin(); it != pDataArray[i]->labels.end(); it++){ labels.insert(*it); }
+                       //check cutoff
+            if (pDataArray[i]->cutoff < cutoff) { cutoff = pDataArray[i]->cutoff; }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
        #endif          
+        
+        return listFiles;
        
        }
        catch(exception& e) {
@@ -969,18 +1014,19 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
 
 vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
        try {
-               Cluster* cluster;
-               SparseMatrix* matrix;
-               ListVector* list;
-               ListVector oldList;
-               RAbundVector* rabund;
                
                vector<string> listFileNames;
-               
                double smallestCutoff = cutoff;
                
                //cluster each distance file
                for (int i = 0; i < distNames.size(); i++) {
+            
+            Cluster* cluster = NULL;
+            SparseMatrix* matrix = NULL;
+            ListVector* list = NULL;
+            ListVector oldList;
+            RAbundVector* rabund = NULL;
+            
                        if (m->control_pressed) { return listFileNames; }
                        
                        string thisNamefile = distNames[i].begin()->second;
@@ -1011,8 +1057,8 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        oldList = *list;
                        matrix = read->getMatrix();
                        
-                       delete read; 
-                       delete nameMap; 
+                       delete read;  read = NULL;
+                       delete nameMap; nameMap = NULL;
                        
                        
                        #ifdef USE_MPI
@@ -1097,6 +1143,7 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        }
        
                        delete matrix; delete list;     delete cluster; delete rabund; 
+            matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
                        listFile.close();
                        
                        if (m->control_pressed) { //clean up
index c4b236cb12dbfc535c1de5da7840b39f21176fb7..cf52ff27d01c28d61f90aa0a8c514d4a04b2136d 100644 (file)
 #include "listvector.hpp"
 #include "cluster.hpp"
 #include "sparsematrix.hpp"
-
+#include "readcluster.h"
+#include "splitmatrix.h"
+#include "readphylip.h"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "inputdata.h"
+#include "clustercommand.h"
 
 class ClusterSplitCommand : public Command {
        
@@ -47,12 +53,208 @@ private:
        ofstream outList, outRabund, outSabund;
        
        void printData(ListVector*);
-       int createProcesses(vector < vector < map<string, string> > >);
+       vector<string> createProcesses(vector< map<string, string> >, set<string>&);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
        int mergeLists(vector<string>, map<float, int>, ListVector*);
        map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
        int createMergedDistanceFile(vector< map<string, string> >);
 };
 
+/////////////////not working for Windows////////////////////////////////////////////////////////////
+// getting an access violation error.  This is most likely caused by the 
+// threads stepping on eachother's structures, as I can run the thread function and the cluster fuction 
+// in separately without errors occuring.  I suspect it may be in the use of the
+// static class mothurOut, but I can't pinpoint the problem.  All other objects are made new
+// within the thread.  MothurOut is used by almost all the classes in mothur, so if this was 
+// really the cause I would expect to see all the windows threaded commands to have issues, but not 
+// all do. So far, shhh.flows and trim.flows have similiar problems. Other thoughts, could it have 
+// anything to do with mothur's use of copy constructors in many of our data structures. ie. listvector 
+// is copied by nameassignment and passed to read which passes to the thread?  -westcott 2-8-12
+////////////////////////////////////////////////////////////////////////////////////////////////////
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct clusterData {
+       set<string> labels;
+       vector < map<string, string> > distNames; 
+       string method; 
+    MothurOut* m;
+       double cutoff, precision;
+    string tag, outputDir;
+    vector<string> listFiles;
+    bool hard;
+    int length, threadID;
+       
+       
+       clusterData(){}
+       clusterData(vector < map<string, string> > dv, MothurOut* mout, double cu, string me, string ou, bool hd, double pre, int len, int th) {
+               distNames = dv;
+               m = mout;
+               cutoff = cu;
+        method = me;
+               outputDir = ou;
+        hard = hd;
+        precision = pre;
+        length = len;
+        threadID = th;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){ 
+       clusterData* pDataArray;
+       pDataArray = (clusterData*)lpParam;
+       
+       try {
+               cout << "starting " << endl;            
+               
+               double smallestCutoff = pDataArray->cutoff;
+               
+               //cluster each distance file
+               for (int i = 0; i < pDataArray->distNames.size(); i++) {
+            
+            Cluster* mycluster = NULL;
+            SparseMatrix* mymatrix = NULL;
+            ListVector* mylist = NULL;
+            ListVector myoldList;
+            RAbundVector* myrabund = NULL;
+                        
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       string thisNamefile = pDataArray->distNames[i].begin()->second;
+                       string thisDistFile = pDataArray->distNames[i].begin()->first;
+            cout << thisNamefile << '\t' << thisDistFile << endl;      
+                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Reading " + thisDistFile); pDataArray->m->mothurOutEndLine();
+                       
+                       ReadMatrix* myread = new ReadColumnMatrix(thisDistFile);        
+                       myread->setCutoff(pDataArray->cutoff);
+                       NameAssignment* mynameMap = new NameAssignment(thisNamefile);
+                       mynameMap->readMap();
+            cout << "done reading " << thisNamefile << endl;  
+                       myread->read(mynameMap);
+                       cout << "done reading " << thisDistFile << endl;  
+                       if (pDataArray->m->control_pressed) {  delete myread; delete mynameMap; break; }
+            
+                       mylist = myread->getListVector();
+                       myoldList = *mylist;
+                       mymatrix = myread->getMatrix();
+            cout << "here" << endl;    
+                       delete myread; myread = NULL;
+                       delete mynameMap; mynameMap = NULL;
+                       
+            pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Clustering " + thisDistFile); pDataArray->m->mothurOutEndLine();
+            
+                       myrabund = new RAbundVector(mylist->getRAbundVector());
+                        cout << "here" << endl;        
+                       //create cluster
+                       if (pDataArray->method == "furthest")   {       mycluster = new CompleteLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
+                       else if(pDataArray->method == "nearest"){       mycluster = new SingleLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
+                       else if(pDataArray->method == "average"){       mycluster = new AverageLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method);     }
+                       pDataArray->tag = mycluster->getTag();
+             cout << "here" << endl;   
+                       if (pDataArray->outputDir == "") { pDataArray->outputDir += pDataArray->m->hasPath(thisDistFile); }
+                       string fileroot = pDataArray->outputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisDistFile));
+                        cout << "here" << endl;        
+                       ofstream listFile;
+                       pDataArray->m->openOutputFile(fileroot+ pDataArray->tag + ".list",      listFile);
+             cout << "here" << endl;   
+                       pDataArray->listFiles.push_back(fileroot+ pDataArray->tag + ".list");
+            
+                       float previousDist = 0.00000;
+                       float rndPreviousDist = 0.00000;
+                       
+                       myoldList = *mylist;
+        
+                       bool print_start = true;
+                       int start = time(NULL);
+                       double saveCutoff = pDataArray->cutoff;
+            
+                       while (mymatrix->getSmallDist() < pDataArray->cutoff && mymatrix->getNNodes() > 0){
+                
+                               if (pDataArray->m->control_pressed) { //clean up
+                                       delete mymatrix; delete mylist; delete mycluster; delete myrabund;
+                                       listFile.close();
+                                       for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
+                                       pDataArray->listFiles.clear(); break;
+                               }
+                
+                               mycluster->update(saveCutoff);
+                
+                               float dist = mymatrix->getSmallDist();
+                               float rndDist;
+                               if (pDataArray->hard) {
+                                       rndDist = pDataArray->m->ceilDist(dist, pDataArray->precision); 
+                               }else{
+                                       rndDist = pDataArray->m->roundDist(dist, pDataArray->precision); 
+                               }
+                
+                               if(previousDist <= 0.0000 && dist != previousDist){
+                                       myoldList.setLabel("unique");
+                                       myoldList.print(listFile);
+                                       if (pDataArray->labels.count("unique") == 0) {  pDataArray->labels.insert("unique");  }
+                               }
+                               else if(rndDist != rndPreviousDist){
+                                       myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
+                                       myoldList.print(listFile);
+                                       if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
+                               }
+                       
+                               previousDist = dist;
+                               rndPreviousDist = rndDist;
+                               myoldList = *mylist;
+                       }
+            
+             cout << "here2" << endl;  
+                       if(previousDist <= 0.0000){
+                               myoldList.setLabel("unique");
+                               myoldList.print(listFile);
+                               if (pDataArray->labels.count("unique") == 0) { pDataArray->labels.insert("unique"); }
+                       }
+                       else if(rndPreviousDist<pDataArray->cutoff){
+                               myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
+                               myoldList.print(listFile);
+                               if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
+                       }
+            
+                       delete mymatrix; delete mylist; delete mycluster; delete myrabund; 
+            mymatrix = NULL; mylist = NULL; mycluster = NULL; myrabund = NULL;
+                       listFile.close();
+                       
+                       if (pDataArray->m->control_pressed) { //clean up
+                               for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
+                               pDataArray->listFiles.clear(); break;
+                       }
+                        cout << "here3" << endl;       
+                       pDataArray->m->mothurRemove(thisDistFile);
+                       pDataArray->m->mothurRemove(thisNamefile);
+                        cout << "here4" << endl;       
+                       if (saveCutoff != pDataArray->cutoff) { 
+                               if (pDataArray->hard)   {  saveCutoff = pDataArray->m->ceilDist(saveCutoff, pDataArray->precision);     }
+                               else            {       saveCutoff = pDataArray->m->roundDist(saveCutoff, pDataArray->precision);  }
+                
+                               pDataArray->m->mothurOut("Cutoff was " + toString(pDataArray->cutoff) + " changed cutoff to " + toString(saveCutoff)); pDataArray->m->mothurOutEndLine();  
+                       }
+                        cout << "here5" << endl;       
+                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+               }
+               
+               pDataArray->cutoff = smallestCutoff;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ClusterSplitCommand", "MyClusterThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
+
+
 #endif
 
index 75fad856be3231809c6aecbaa782865df30e3177..303bf084c15b6d2f525ed70152348374b00f03b0 100644 (file)
 #include "summaryqualcommand.h"
 #include "otuassociationcommand.h"
 #include "sortseqscommand.h"
+#include "classifytreecommand.h"
 
 /*******************************************************/
 
@@ -277,6 +278,7 @@ CommandFactory::CommandFactory(){
        commands["shhh.seqs"]                   = "shhh.seqs";
        commands["otu.association"]             = "otu.association";
     commands["sort.seqs"]           = "sort.seqs";
+    commands["classify.tree"]       = "classify.tree";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -439,6 +441,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "shhh.seqs")                             {       command = new ShhhSeqsCommand(optionString);                            }
                else if(commandName == "otu.association")               {       command = new OTUAssociationCommand(optionString);                      }
         else if(commandName == "sort.seqs")             {      command = new SortSeqsCommand(optionString);                }
+        else if(commandName == "classify.tree")         {      command = new ClassifyTreeCommand(optionString);            }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -585,6 +588,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "shhh.seqs")                             {       pipecommand = new ShhhSeqsCommand(optionString);                                }
                else if(commandName == "otu.association")               {       pipecommand = new OTUAssociationCommand(optionString);                  }
         else if(commandName == "sort.seqs")             {      pipecommand = new SortSeqsCommand(optionString);                }
+        else if(commandName == "classify.tree")         {      pipecommand = new ClassifyTreeCommand(optionString);            }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -719,6 +723,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "shhh.seqs")                             {       shellcommand = new ShhhSeqsCommand();                           }
                else if(commandName == "otu.association")               {       shellcommand = new OTUAssociationCommand();                     }
         else if(commandName == "sort.seqs")             {      shellcommand = new SortSeqsCommand();               }
+        else if(commandName == "classify.tree")         {      shellcommand = new ClassifyTreeCommand();           }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
@@ -744,7 +749,7 @@ Command* CommandFactory::getCommand(){
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 bool CommandFactory::isValidCommand(string command) {
        try {   
        
index 9cdd033ac29609d6f763f52597a4809fea204326..e83c6035731aa5509655dcd25a7a45995c1a0004 100644 (file)
@@ -174,7 +174,8 @@ int CountSeqsCommand::execute(){
                //open input file
                ifstream in;
                m->openInputFile(namefile, in);
-               
+        
+               int total = 0;
                while (!in.eof()) {
                        if (m->control_pressed) { break; }
                        
@@ -217,7 +218,7 @@ int CountSeqsCommand::execute(){
                                out << firstCol << '\t' << names.size() << endl;
                        }
                        
-                       
+                       total += names.size();
                }
                in.close();
                
@@ -225,6 +226,8 @@ int CountSeqsCommand::execute(){
                
                if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
                
+        m->mothurOutEndLine();
+               m->mothurOut("Total number of sequences: " + toString(total)); m->mothurOutEndLine();
                m->mothurOutEndLine();
                m->mothurOut("Output File Name: "); m->mothurOutEndLine();
                m->mothurOut(outputFileName); m->mothurOutEndLine();    
index b2817a779dc1b95070ef77fbe6e8bf5b11159a3c..49f39035e0aa95d6739616c0a6804858b53cdf9e 100644 (file)
@@ -45,7 +45,6 @@ class Database {
 
 public:
        Database();
-       Database(const Database& db) : numSeqs(db.numSeqs), longest(db.longest), searchScore(db.searchScore), results(db.results), Scores(db.Scores) { m = MothurOut::getInstance(); }
        virtual ~Database();
        virtual void generateDB() = 0; 
        virtual void addSequence(Sequence) = 0;  //add sequence to search engine
index 973340ca0589c87dcf36176638f99f959cd484cc..98545f8e24a72dca332bf7571fc0e5e9e6936988 100644 (file)
@@ -599,7 +599,7 @@ cout << largest->second << '\t' << largest->first->score << '\t' << largest->fir
        }
 }
 
-//***************************************************************************************************************
+***************************************************************************************************************
 //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used...
 int DeCalculator::findLargestContrib(vector<int> seen) {
        try{
@@ -624,7 +624,7 @@ int DeCalculator::findLargestContrib(vector<int> seen) {
                exit(1);
        }
 }
-//***************************************************************************************************************
+***************************************************************************************************************
 void DeCalculator::removeContrib(int bad, vector<quanMember>& quan) {
        try{
        
index 65edcf75366d4096b1256266b05296146c30167b..0ac17877b2bed9bfc441a70dcece14cd99081727 100644 (file)
@@ -1094,7 +1094,7 @@ int DistanceCommand::convertMatrix(string outputFile) {
                exit(1);
        }
 }
-/**************************************************************************************************
+**************************************************************************************************
 int DistanceCommand::convertToLowerTriangle(string outputFile) {
        try{
 
@@ -1188,7 +1188,7 @@ int DistanceCommand::convertToLowerTriangle(string outputFile) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff,
 //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it.
 //also check to make sure the 2 files have the same alignment length.
index 2cbf5cad6edf5df5b62b7c345cf09032eaf7e10f..27e278574493c89d0197d99cbe71c0c3dd03d0d9 100644 (file)
 #include "onegapignore.h"
 
 
-/**************************************************************************************************/
-DistanceDB::DistanceDB(const DistanceDB& ddb) : data(ddb.data), templateSeqsLength(ddb.templateSeqsLength), templateAligned(ddb.templateAligned), Database(ddb) { 
-       distCalculator = new oneGapIgnoreTermGapDist(); 
-}
 /**************************************************************************************************/
 DistanceDB::DistanceDB() : Database() { 
        try {
index d7e05db85afed819b9f2d26c99d06c1426ca19f9..2624d6d6440190520e02af09c43264bf365c9f33 100644 (file)
@@ -19,7 +19,6 @@ class DistanceDB : public Database {
 public:
        
        DistanceDB();
-       DistanceDB(const DistanceDB& ddb); 
        ~DistanceDB() { delete distCalculator; }
        
        void generateDB() {} //doesn't generate a search db 
index fe9bc3099d7a1dcdd347bfe98a23e1bceb44768a..1aca10c4bd208f6cf6c0dbfd9f97fdde97afe576 100644 (file)
@@ -19,7 +19,6 @@ class eachGapDist : public Dist {
 public:
        
        eachGapDist() {}
-       eachGapDist(const eachGapDist& ddb) {}
        
        void calcDist(Sequence A, Sequence B){          
                int diff = 0;
index f0514655a35212215a49fbdb7b1c14c5a3415352..6cd45314194fccce192c7654e2d8994aff6766c5 100644 (file)
@@ -752,7 +752,7 @@ seqDist HCluster::getNextDist(char* buffer, int& index, int size){
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 int HCluster::processFile() {
        try {
                string firstName, secondName;
index ccc6b96ee9d6f0e5f3761f9e969741f5e08defdd..894f92d9644182ece0a7d18e491384d2058f2d54 100644 (file)
@@ -21,7 +21,6 @@ class ignoreGaps : public Dist {
 public:
        
        ignoreGaps() {}
-       ignoreGaps(const ignoreGaps& ddb) {}
        
        void calcDist(Sequence A, Sequence B){          
                int diff = 0;
index 4ae00b91311677488c35961186dfbf072ba005f3..ead3d7e0b20d7b0ee070caebaad74aedeaae7dd3 100644 (file)
@@ -26,7 +26,6 @@ class KmerDB : public Database {
        
 public:
        KmerDB(string, int);
-       KmerDB(const KmerDB& kdb) : kmerSize(kdb.kmerSize), maxKmer(kdb.maxKmer), count(kdb.count), kmerDBName(kdb.kmerDBName), kmerLocations(kdb.kmerLocations), Database(kdb) {}
        KmerDB();
        ~KmerDB();
        
index f31e4821fe52cb4160af0c6dbc30c0bf989a1cf6..3d1d1c9fa5ac995a65265b62ed4c59f25ed6679d 100644 (file)
@@ -612,7 +612,7 @@ vector<trace_struct> Maligner::extractHighestPath(vector<vector<score_struct> >
        }
 }
 
-//***************************************************************************************************************
+***************************************************************************************************************
 
 vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
        try {
index ae822254c01ccdf7359ca319c44a2db5005f1fc4..ae3ab802b2e4380c93655f2d6a6010beaf16f151 100644 (file)
@@ -217,7 +217,7 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                        if(qvalues[i] < threshold){
                                m->mothurOut("Feature " + toString((i+1)) + " is significant, q = "); 
                                cout << qvalues[i];
-                               m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
+                               m->mothurOutJustToLog(toString(qvalues[i])); m->mothurOutEndLine();
                        }       
                }
                
@@ -492,6 +492,12 @@ int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupi
 vector<double> MothurMetastats::calc_qvalues(vector<double>& pValues) {
        try {
                
+       /* cout << "x <- c(" << pValues[0];
+        for (int l = 1; l < pValues.size(); l++){
+            cout << ", " << pValues[l];
+        }
+        cout << ")\n";*/
+        
                int numRows = pValues.size();
                vector<double> qvalues(numRows, 0.0);
 
index 20a7b5235029b6d7908ca0f23f119c3c38d095f1..25e534e43880497955512ce7dc2240adf99157e9 100644 (file)
@@ -212,8 +212,8 @@ void MothurOut::mothurOut(string output) {
                        if (pid == 0) { //only one process should output to screen
                #endif
                
-               cout << output;
                out << output;
+        logger() << output;
                
                #ifdef USE_MPI
                        }
@@ -234,8 +234,8 @@ void MothurOut::mothurOutEndLine() {
                        if (pid == 0) { //only one process should output to screen
                #endif
                
-               cout << endl;
                out << endl;
+        logger() << endl;
                
                #ifdef USE_MPI
                        }
@@ -257,13 +257,15 @@ void MothurOut::mothurOut(string output, ofstream& outputFile) {
                if (pid == 0) { //only one process should output to screen
 #endif
                        
-                       cout << output;
+                       
                        out << output;
                        outputFile << output;
+            logger() << output;
                        
 #ifdef USE_MPI
                }
 #endif
+        
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "MothurOut");
@@ -280,9 +282,9 @@ void MothurOut::mothurOutEndLine(ofstream& outputFile) {
                if (pid == 0) { //only one process should output to screen
 #endif
                        
-                       cout << endl;
                        out << endl;
                        outputFile << endl;
+            logger() << endl;
                        
 #ifdef USE_MPI
                }
@@ -726,7 +728,7 @@ string MothurOut::getFullPathName(string fileName){
                                        }else if (path[(pos-1)] == '/') { //you want the current working dir ./
                                                path = path.substr(0, pos);
                                        }else if (pos == 1) { break;  //you are at the end
-                                       }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
+                                       }else { mothurOut("cannot resolve path for " +  fileName + "\n"); return fileName; }
                                }
                        
                                for (int i = index; i >= 0; i--) {
@@ -772,7 +774,7 @@ string MothurOut::getFullPathName(string fileName){
                                        }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
                                                path = path.substr(0, pos);
                                        }else if (pos == 1) { break;  //you are at the end
-                                       }else { cout << "cannot resolve path for " <<  fileName << endl; return fileName; }
+                                       }else { mothurOut("cannot resolve path for " +  fileName + "\n"); return fileName; }
                                }
                        
                                for (int i = index; i >= 0; i--) {
@@ -1151,7 +1153,7 @@ vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& n
                                        while(isspace(d) && (d != in.eof()))            { d=in.get(); count++;}
                                }
                                positions.push_back(count-1);
-                               cout << count-1 << endl;
+                               //cout << count-1 << endl;
                        }
                        in.close();
                
index d2a36b351902f3bc25d7c2132fb2c20f15421b23..12de987e231247a4fb195aef20054bd94dacd375 100644 (file)
 
 #include "mothur.h"
 
+/***********************************************/
+struct logger {
+    
+    logger() {}
+    ~logger() {}
+    
+    template< class T >
+    logger& operator <<( const T& o ) {
+        cout << o; return *this;
+    }
+    
+    logger& operator<<(ostream& (*m)(ostream&) ) {
+        cout << m; return *this;
+    }
+    
+}; 
 /***********************************************/
 
 class MothurOut {
index 3cf38cdb9d752652cc70a865ce6b1b87e1da939b..f7cb65ec43c741a1d4ae5be194fd49fa75b2fc9e 100644 (file)
@@ -515,7 +515,7 @@ int Perseus::getChimera(vector<seqData> sequences,
                        for(int i=0;i<numRefSeqs;i++){
                                
                                if(restricted[i] == 0){
-                                       if(leftDiffs[i][l] < singleLeft[l] && sequences[i].frequency || (leftDiffs[i][l] == singleLeft[l] && sequences[i].frequency > sequences[bestLeft[l]].frequency)){
+                                       if(((leftDiffs[i][l] < singleLeft[l]) && sequences[i].frequency) || ((leftDiffs[i][l] == singleLeft[l]) && (sequences[i].frequency > sequences[bestLeft[l]].frequency))){
                                                singleLeft[l] = leftDiffs[i][l];
                                                bestLeft[l] = i;
                                        }
@@ -533,7 +533,7 @@ int Perseus::getChimera(vector<seqData> sequences,
                        for(int i=0;i<numRefSeqs;i++){
                                
                                if(restricted[i] == 0){
-                                       if(rightDiffs[i][l] < singleRight[l] && sequences[i].frequency || (rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency)){
+                                       if((rightDiffs[i][l] < singleRight[l] && sequences[i].frequency) || ((rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency))){
                                                singleRight[l] = rightDiffs[i][l];
                                                bestRight[l] = i;
                                        }
@@ -649,7 +649,7 @@ int Perseus::getTrimera(vector<seqData>& sequences,
                                        if(restricted[i] == 0){
                                                int delta = leftDiffs[i][y] - leftDiffs[i][x];
 
-                                               if(delta < minDelta[x][y] || delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency){
+                                               if(delta < minDelta[x][y] || (delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency)){
                                                        minDelta[x][y] = delta;
                                                        minDeltaSeq[x][y] = i;                                  
                                                }                               
index 971e6b0ebbad92014347872ee92f4222c044db72..3e5b6c72f1d862ab88698360566a84e6a87953de 100644 (file)
@@ -19,7 +19,6 @@ class oneGapDist : public Dist {
 public:
        
        oneGapDist() {}
-       oneGapDist(const oneGapDist& ddb) {}
        
        void calcDist(Sequence A, Sequence B){
                
index dba3f38292ced957330631ece5572901ee3438ae..fdbc196e9f2f7ff539a9b61bf35a8120483ac66b 100644 (file)
@@ -19,7 +19,6 @@ class oneGapIgnoreTermGapDist : public Dist {
 public:
        
        oneGapIgnoreTermGapDist() {}
-       oneGapIgnoreTermGapDist(const oneGapIgnoreTermGapDist& ddb) {}
        
        void calcDist(Sequence A, Sequence B){
                
index 86dc539b8e1a6e1385d178ab0473bcc966a71705..d5ccaf948feaef51156f3a77346fe0908e4598ec 100644 (file)
@@ -20,7 +20,7 @@ EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes, vector< vect
                //initialize Dscore
                for (int i=0; i<globaldata->Groups.size(); i++) {               DScore[globaldata->Groups[i]] = 0.0;    }
        
-               /********************************************************
+               ********************************************************
                //calculate a D value for each group 
                for(int v=0;v<treeNodes.size();v++){
                                
@@ -75,7 +75,7 @@ EstOutput PhyloDiversity::getValues(Tree* t, vector<int> treeNodes, vector< vect
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 
 
 
index 582da493e4dc5cdf5267d65606181d1594f56a87..8e4f3274a490679c85f992f8584605c877c84850 100644 (file)
@@ -514,7 +514,7 @@ int PreClusterCommand::readFASTA(){
                m->openInputFile(fastafile, inFasta);
                
                //string firstCol, secondCol, nameString;
-               length = 0;
+               set<int> lengths;
                
                while (!inFasta.eof()) {
                        
@@ -540,17 +540,20 @@ int PreClusterCommand::readFASTA(){
                                        else{
                                                seqPNode tempNode(itSize->second, seq, names[seq.getName()]);
                                                alignSeqs.push_back(tempNode);
-                                               if (seq.getAligned().length() > length) {  length = seq.getAligned().length();  }
+                                               lengths.insert(seq.getAligned().length());
                                        }       
                                }else { //no names file, you are identical to yourself 
                                        seqPNode tempNode(1, seq, seq.getName());
                                        alignSeqs.push_back(tempNode);
-                                       if (seq.getAligned().length() > length) {  length = seq.getAligned().length();  }
+                                       lengths.insert(seq.getAligned().length());
                                }
                        }
                }
                inFasta.close();
                //inNames.close();
+        
+        if (lengths.size() > 1) { m->control_pressed = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
+        
                return alignSeqs.size();
        }
        
@@ -562,7 +565,7 @@ int PreClusterCommand::readFASTA(){
 /**************************************************************************************************/
 int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>& thisSeqs){
        try {
-               length = 0;
+               set<int> lengths;
                alignSeqs.clear();
                map<string, string>::iterator it;
                bool error = false;
@@ -585,15 +588,17 @@ int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>&
                                        
                                        seqPNode tempNode(numReps, thisSeqs[i], it->second);
                                        alignSeqs.push_back(tempNode);
-                                       if (thisSeqs[i].getAligned().length() > length) {  length = thisSeqs[i].getAligned().length();  }
+                    lengths.insert(thisSeqs[i].getAligned().length());
                                }       
                        }else { //no names file, you are identical to yourself 
                                seqPNode tempNode(1, thisSeqs[i], thisSeqs[i].getName());
                                alignSeqs.push_back(tempNode);
-                               if (thisSeqs[i].getAligned().length() > length) {  length = thisSeqs[i].getAligned().length();  }
+                               lengths.insert(thisSeqs[i].getAligned().length());
                        }
                }
                
+        if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
+        
                //sanity check
                if (error) { m->control_pressed = true; }
                
index 36400c1324380cda8e5570c028ac48aa4beaa2c9..d91490edc47ec4e175d90b1dff556da66c8c79df 100644 (file)
@@ -27,5 +27,5 @@ void ReferenceDB::clearMemory()  {
 }
 /*******************************************************
 ReferenceDB::~ReferenceDB() { myInstance = NULL; }
-/*******************************************************/
+*******************************************************/
 
index 9ee80106b4346391c66d8fc75650b3f71e337062..8e7a4395fbf20009d135502fde3774b2592a9440 100644 (file)
@@ -1016,4 +1016,4 @@ int main(int argc, char *argv[]){
        return 0;
 }
 
-/**************************************************************************************************/
+**************************************************************************************************/
index 6a50cb0b8aa31e4a3a3b6c9c97e77416f53eeeb1..af0cba814baece3ae9ce4abae02b3fa7370769cf 100644 (file)
@@ -28,9 +28,6 @@ public:
        Sequence(string, string);
        Sequence(ifstream&);
        Sequence(istringstream&);
-       Sequence(const Sequence& se) : name(se.name), unaligned(se.unaligned), aligned(se.aligned), pairwise(se.pairwise), numBases(se.numBases), startPos(se.startPos), endPos(se.endPos),
-                                                                       alignmentLength(se.alignmentLength), isAligned(se.isAligned), longHomoPolymer(se.longHomoPolymer), ambigBases(se.ambigBases) { m = MothurOut::getInstance(); }
-       
        //these constructors just set the unaligned string to save space
        Sequence(string, string, string);  
        Sequence(ifstream&, string);
index bea2f8c83808b287428bc309f513b8b64d548366..8abf92023620096dd820dbf2f16fcfb1fe34464c 100644 (file)
@@ -348,7 +348,7 @@ SharedRAbundVector SharedRAbundFloatVector::getSharedRAbundVector(){
                exit(1);
        }               
 }
-/***********************************************************************/
+***********************************************************************/
 vector<SharedRAbundFloatVector*> SharedRAbundFloatVector::getSharedRAbundFloatVectors(){
        try {
                SharedUtil* util;
@@ -419,7 +419,7 @@ SharedSAbundVector SharedRAbundVector::getSharedSAbundVector(){
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 
 SAbundVector SharedRAbundFloatVector::getSAbundVector() {
        try {
@@ -461,7 +461,7 @@ SharedOrderVector SharedRAbundFloatVector::getSharedOrderVector() {
                exit(1);
        }
 }
-/***********************************************************************/
+***********************************************************************/
 //this is not functional, not sure how to handle it yet, but I need the stub because it is a pure function
 OrderVector SharedRAbundFloatVector::getOrderVector(map<string,int>* nameMap = NULL) {
        try {
index 5ad4b2d7e70d2620c037b498a66b4d16cef62371..70b09603be0f4cd85081f9f1b530c5ec0b61040e 100644 (file)
@@ -55,7 +55,7 @@ SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : Data
 }
 
 
-/***********************************************************************/
+***********************************************************************/
 //reads a shared file
 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
        try {
index 537821134edebcf9dcad70f8c1f38be0da39b121..5d8263c86e2a1f3eae24da047897e2c642d243eb 100644 (file)
@@ -9,15 +9,6 @@
 
 #include "shhhercommand.h"
 
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-#include "rabundvector.hpp"
-#include "sabundvector.hpp"
-#include "listvector.hpp"
-#include "cluster.hpp"
-#include "sparsematrix.hpp"
-#include <cfloat>
-
 //**********************************************************************************************************************
 vector<string> ShhherCommand::setParameters(){ 
        try {
@@ -76,18 +67,8 @@ ShhherCommand::ShhherCommand(){
 
 ShhherCommand::ShhherCommand(string option) {
        try {
-
-#ifdef USE_MPI
-               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-               MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
-
-               if(pid == 0){
-#endif
-               
-               
                abort = false; calledHelp = false;   
                
-               
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
@@ -168,6 +149,67 @@ ShhherCommand::ShhherCommand(string option) {
                                m->openOutputFile(compositeNamesFileName, temp);
                                temp.close();
                        }
+            
+            if(flowFilesFileName != "not found"){
+                string fName;
+                
+                ifstream flowFilesFile;
+                m->openInputFile(flowFilesFileName, flowFilesFile);
+                while(flowFilesFile){
+                    fName = m->getline(flowFilesFile);
+                    
+                    //test if file is valid
+                    ifstream in;
+                    int ableToOpen = m->openInputFile(fName, in, "noerror");
+                    in.close();        
+                    if (ableToOpen == 1) {
+                        if (inputDir != "") { //default path is set
+                            string tryPath = inputDir + fName;
+                            m->mothurOut("Unable to open " + fName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
+                            ifstream in2;
+                            ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                            in2.close();
+                            fName = tryPath;
+                        }
+                    }
+                    
+                    if (ableToOpen == 1) {
+                        if (m->getDefaultPath() != "") { //default path is set
+                            string tryPath = m->getDefaultPath() + m->getSimpleName(fName);
+                            m->mothurOut("Unable to open " + fName + ". Trying default " + tryPath); m->mothurOutEndLine();
+                            ifstream in2;
+                            ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                            in2.close();
+                            fName = tryPath;
+                        }
+                    }
+                    
+                    //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+                    if (ableToOpen == 1) {
+                        string exepath = m->argv;
+                        string tempPath = exepath;
+                        for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+                        exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+                        
+                        string tryPath = m->getFullPathName(exepath) + m->getSimpleName(fName);
+                        m->mothurOut("Unable to open " + fName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
+                        ifstream in2;
+                        ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                        in2.close();
+                        fName = tryPath;
+                    }
+                    
+                    if (ableToOpen == 1) {  m->mothurOut("Unable to open " + fName + ". Disregarding. "); m->mothurOutEndLine();  }
+                    else { flowFileVector.push_back(fName); }
+                    m->gobble(flowFilesFile);
+                }
+                flowFilesFile.close();
+                if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
+            }
+            else{
+                flowFileVector.push_back(flowFileName);
+            }
+
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -272,11 +314,6 @@ ShhherCommand::ShhherCommand(string option) {
                        }
                        
                }
-                       
-#ifdef USE_MPI
-               }                               
-#endif
-                               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "ShhherCommand");
@@ -291,7 +328,9 @@ int ShhherCommand::execute(){
                
                int tag = 1976;
                MPI_Status status; 
-
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
+        
                if(pid == 0){
 
                        for(int i=1;i<ncpus;i++){
@@ -305,21 +344,6 @@ int ShhherCommand::execute(){
                        getSingleLookUp();      if (m->control_pressed) { return 0; }
                        getJointLookUp();       if (m->control_pressed) { return 0; }
                        
-                       vector<string> flowFileVector;
-                       if(flowFilesFileName != "not found"){
-                               string fName;
-
-                               ifstream flowFilesFile;
-                               m->openInputFile(flowFilesFileName, flowFilesFile);
-                               while(flowFilesFile){
-                                       fName = m->getline(flowFilesFile);
-                                       flowFileVector.push_back(fName);
-                                       m->gobble(flowFilesFile);
-                               }
-                       }
-                       else{
-                               flowFileVector.push_back(flowFileName);
-                       }
                        int numFiles = flowFileVector.size();
 
                        for(int i=1;i<ncpus;i++){
@@ -694,7 +718,6 @@ int ShhherCommand::execute(){
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
@@ -741,10 +764,1050 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
                return fDistFileName;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
+               m->errorOut(e, "ShhherCommand", "flowDistMPI");
                exit(1);
        }
 }
+/**************************************************************************************************/
+void ShhherCommand::getOTUData(string listFileName){
+    try {
+        
+        ifstream listFile;
+        m->openInputFile(listFileName, listFile);
+        string label;
+        
+        listFile >> label >> numOTUs;
+        
+        otuData.assign(numSeqs, 0);
+        cumNumSeqs.assign(numOTUs, 0);
+        nSeqsPerOTU.assign(numOTUs, 0);
+        aaP.clear();aaP.resize(numOTUs);
+        
+        seqNumber.clear();
+        aaI.clear();
+        seqIndex.clear();
+        
+        string singleOTU = "";
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            listFile >> singleOTU;
+            
+            istringstream otuString(singleOTU);
+            
+            while(otuString){
+                
+                string seqName = "";
+                
+                for(int j=0;j<singleOTU.length();j++){
+                    char letter = otuString.get();
+                    
+                    if(letter != ','){
+                        seqName += letter;
+                    }
+                    else{
+                        map<string,int>::iterator nmIt = nameMap.find(seqName);
+                        int index = nmIt->second;
+                        
+                        nameMap.erase(nmIt);
+                        
+                        otuData[index] = i;
+                        nSeqsPerOTU[i]++;
+                        aaP[i].push_back(index);
+                        seqName = "";
+                    }
+                }
+                
+                map<string,int>::iterator nmIt = nameMap.find(seqName);
+                
+                int index = nmIt->second;
+                nameMap.erase(nmIt);
+                
+                otuData[index] = i;
+                nSeqsPerOTU[i]++;
+                aaP[i].push_back(index);       
+                
+                otuString.get();
+            }
+            
+            sort(aaP[i].begin(), aaP[i].end());
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                seqNumber.push_back(aaP[i][j]);
+            }
+            for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
+                aaP[i].push_back(0);
+            }
+            
+            
+        }
+        
+        for(int i=1;i<numOTUs;i++){
+            cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
+        }
+        aaI = aaP;
+        seqIndex = seqNumber;
+        
+        listFile.close();      
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getOTUData");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::initPyroCluster(){                          
+    try{
+        if (numOTUs < processors) { processors = 1; }
+        
+        dist.assign(numSeqs * numOTUs, 0);
+        change.assign(numOTUs, 1);
+        centroids.assign(numOTUs, -1);
+        weight.assign(numOTUs, 0);
+        singleTau.assign(numSeqs, 1.0);
+        
+        nSeqsBreaks.assign(processors+1, 0);
+        nOTUsBreaks.assign(processors+1, 0);
+        
+        nSeqsBreaks[0] = 0;
+        for(int i=0;i<processors;i++){
+            nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
+            nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
+        }
+        nSeqsBreaks[processors] = numSeqs;
+        nOTUsBreaks[processors] = numOTUs;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "initPyroCluster");
+        exit(1);       
+    }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::fill(){
+    try {
+        int index = 0;
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            cumNumSeqs[i] = index;
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                seqNumber[index] = aaP[i][j];
+                seqIndex[index] = aaI[i][j];
+                
+                index++;
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "fill");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+void ShhherCommand::getFlowData(){
+    try{
+        ifstream flowFile;
+        m->openInputFile(flowFileName, flowFile);
+        
+        string seqName;
+        seqNameVector.clear();
+        lengths.clear();
+        flowDataIntI.clear();
+        nameMap.clear();
+        
+        
+        int currentNumFlowCells;
+        
+        float intensity;
+        
+        flowFile >> numFlowCells;
+        int index = 0;//pcluster
+        while(!flowFile.eof()){
+            
+            if (m->control_pressed) { break; }
+            
+            flowFile >> seqName >> currentNumFlowCells;
+            lengths.push_back(currentNumFlowCells);
+            
+            seqNameVector.push_back(seqName);
+            nameMap[seqName] = index++;//pcluster
+            
+            for(int i=0;i<numFlowCells;i++){
+                flowFile >> intensity;
+                if(intensity > 9.99)   {       intensity = 9.99;       }
+                int intI = int(100 * intensity + 0.0001);
+                flowDataIntI.push_back(intI);
+            }
+            m->gobble(flowFile);
+        }
+        flowFile.close();
+        
+        numSeqs = seqNameVector.size();                
+        
+        for(int i=0;i<numSeqs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int iNumFlowCells = i * numFlowCells;
+            for(int j=lengths[i];j<numFlowCells;j++){
+                flowDataIntI[iNumFlowCells + j] = 0;
+            }
+        }
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getFlowData");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
+       
+       try{
+               vector<double> newTau(numOTUs,0);
+               vector<double> norms(numSeqs, 0);
+               otuIndex.clear();
+               seqIndex.clear();
+               singleTau.clear();
+               
+               for(int i=startSeq;i<stopSeq;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       double offset = 1e8;
+                       int indexOffset = i * numOTUs;
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               
+                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                               }
+                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                                       offset = dist[indexOffset + j];
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT){
+                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                                       norms[i] += newTau[j];
+                               }
+                               else{
+                                       newTau[j] = 0.0;
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                
+                               newTau[j] /= norms[i];
+                               
+                               if(newTau[j] > MIN_TAU){
+                                       otuIndex.push_back(j);
+                                       seqIndex.push_back(i);
+                                       singleTau.push_back(newTau[j]);
+                               }
+                       }
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
+    
+    try{
+        
+        int total = 0;
+        vector<double> newTau(numOTUs,0);
+        vector<double> norms(numSeqs, 0);
+        nSeqsPerOTU.assign(numOTUs, 0);
+        
+        for(int i=startSeq;i<stopSeq;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int indexOffset = i * numOTUs;
+            
+            double offset = 1e8;
+            
+            for(int j=0;j<numOTUs;j++){
+                
+                if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                    dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                }
+                
+                if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                    offset = dist[indexOffset + j];
+                }
+            }
+            
+            for(int j=0;j<numOTUs;j++){
+                if(weight[j] > MIN_WEIGHT){
+                    newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                    norms[i] += newTau[j];
+                }
+                else{
+                    newTau[j] = 0.0;
+                }
+            }
+            
+            for(int j=0;j<numOTUs;j++){
+                newTau[j] /= norms[i];
+            }
+            
+            for(int j=0;j<numOTUs;j++){
+                if(newTau[j] > MIN_TAU){
+                    
+                    int oldTotal = total;
+                    
+                    total++;
+                    
+                    singleTau.resize(total, 0);
+                    seqNumber.resize(total, 0);
+                    seqIndex.resize(total, 0);
+                    
+                    singleTau[oldTotal] = newTau[j];
+                    
+                    aaP[j][nSeqsPerOTU[j]] = oldTotal;
+                    aaI[j][nSeqsPerOTU[j]] = i;
+                    nSeqsPerOTU[j]++;
+                }
+            }
+            
+        }
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::setOTUs(){
+    
+    try {
+        vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                int index = cumNumSeqs[i] + j;
+                double tauValue = singleTau[seqNumber[index]];
+                int sIndex = seqIndex[index];
+                bigTauMatrix[sIndex * numOTUs + i] = tauValue;                         
+            }
+        }
+        
+        for(int i=0;i<numSeqs;i++){
+            double maxTau = -1.0000;
+            int maxOTU = -1;
+            for(int j=0;j<numOTUs;j++){
+                if(bigTauMatrix[i * numOTUs + j] > maxTau){
+                    maxTau = bigTauMatrix[i * numOTUs + j];
+                    maxOTU = j;
+                }
+            }
+            
+            otuData[i] = maxOTU;
+        }
+        
+        nSeqsPerOTU.assign(numOTUs, 0);                
+        
+        for(int i=0;i<numSeqs;i++){
+            int index = otuData[i];
+            
+            singleTau[i] = 1.0000;
+            dist[i] = 0.0000;
+            
+            aaP[index][nSeqsPerOTU[index]] = i;
+            aaI[index][nSeqsPerOTU[index]] = i;
+            
+            nSeqsPerOTU[index]++;
+        }
+        fill();        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "setOTUs");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+void ShhherCommand::getUniques(){
+    try{
+        
+        
+        numUniques = 0;
+        uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
+        uniqueCount.assign(numSeqs, 0);                                                        //      anWeights
+        uniqueLengths.assign(numSeqs, 0);
+        mapSeqToUnique.assign(numSeqs, -1);
+        mapUniqueToSeq.assign(numSeqs, -1);
+        
+        vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
+        
+        for(int i=0;i<numSeqs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int index = 0;
+            
+            vector<short> current(numFlowCells);
+            for(int j=0;j<numFlowCells;j++){
+                current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+            }
+            
+            for(int j=0;j<numUniques;j++){
+                int offset = j * numFlowCells;
+                bool toEnd = 1;
+                
+                int shorterLength;
+                if(lengths[i] < uniqueLengths[j])      {       shorterLength = lengths[i];                     }
+                else                                                           {       shorterLength = uniqueLengths[j];       }
+                
+                for(int k=0;k<shorterLength;k++){
+                    if(current[k] != uniqueFlowgrams[offset + k]){
+                        toEnd = 0;
+                        break;
+                    }
+                }
+                
+                if(toEnd){
+                    mapSeqToUnique[i] = j;
+                    uniqueCount[j]++;
+                    index = j;
+                    if(lengths[i] > uniqueLengths[j])  {       uniqueLengths[j] = lengths[i];  }
+                    break;
+                }
+                index++;
+            }
+            
+            if(index == numUniques){
+                uniqueLengths[numUniques] = lengths[i];
+                uniqueCount[numUniques] = 1;
+                mapSeqToUnique[i] = numUniques;//anMap
+                mapUniqueToSeq[numUniques] = i;//anF
+                
+                for(int k=0;k<numFlowCells;k++){
+                    uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
+                    uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
+                }
+                
+                numUniques++;
+            }
+        }
+        uniqueFlowDataIntI.resize(numFlowCells * numUniques);
+        uniqueLengths.resize(numUniques);      
+        
+        flowDataPrI.resize(numSeqs * numFlowCells, 0);
+        for(int i=0;i<flowDataPrI.size();i++)  {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getUniques");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
+    try{
+        int minLength = lengths[mapSeqToUnique[seqA]];
+        if(lengths[seqB] < minLength){ minLength = lengths[mapSeqToUnique[seqB]];      }
+        
+        int ANumFlowCells = seqA * numFlowCells;
+        int BNumFlowCells = seqB * numFlowCells;
+        
+        float dist = 0;
+        
+        for(int i=0;i<minLength;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int flowAIntI = flowDataIntI[ANumFlowCells + i];
+            float flowAPrI = flowDataPrI[ANumFlowCells + i];
+            
+            int flowBIntI = flowDataIntI[BNumFlowCells + i];
+            float flowBPrI = flowDataPrI[BNumFlowCells + i];
+            dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
+        }
+        
+        dist /= (float) minLength;
+        return dist;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
+        exit(1);
+    }
+}
+
+//**********************************************************************************************************************/
+
+string ShhherCommand::cluster(string distFileName, string namesFileName){
+    try {
+        
+        ReadMatrix* read = new ReadColumnMatrix(distFileName);         
+        read->setCutoff(cutoff);
+        
+        NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+        clusterNameMap->readMap();
+        read->read(clusterNameMap);
+        
+        ListVector* list = read->getListVector();
+        SparseMatrix* matrix = read->getMatrix();
+        
+        delete read; 
+        delete clusterNameMap; 
+        
+        RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+        
+        Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
+        string tag = cluster->getTag();
+        
+        double clusterCutoff = cutoff;
+        while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+            
+            if (m->control_pressed) { break; }
+            
+            cluster->update(clusterCutoff);
+        }
+        
+        list->setLabel(toString(cutoff));
+        
+        string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+        ofstream listFile;
+        m->openOutputFile(listFileName, listFile);
+        list->print(listFile);
+        listFile.close();
+        
+        delete matrix; delete cluster; delete rabund; delete list;
+        
+        return listFileName;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "cluster");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
+    
+    //this function gets the most likely homopolymer length at a flow position for a group of sequences
+    //within an otu
+    
+    try{
+        
+        for(int i=start;i<finish;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            double count = 0;
+            int position = 0;
+            int minFlowGram = 100000000;
+            double minFlowValue = 1e8;
+            change[i] = 0; //FALSE
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+            }
+            
+            if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                vector<double> adF(nSeqsPerOTU[i]);
+                vector<int> anL(nSeqsPerOTU[i]);
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int index = cumNumSeqs[i] + j;
+                    int nI = seqIndex[index];
+                    int nIU = mapSeqToUnique[nI];
+                    
+                    int k;
+                    for(k=0;k<position;k++){
+                        if(nIU == anL[k]){
+                            break;
+                        }
+                    }
+                    if(k == position){
+                        anL[position] = nIU;
+                        adF[position] = 0.0000;
+                        position++;
+                    }                                          
+                }
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int index = cumNumSeqs[i] + j;
+                    int nI = seqIndex[index];
+                    
+                    double tauValue = singleTau[seqNumber[index]];
+                    
+                    for(int k=0;k<position;k++){
+                        double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
+                        adF[k] += dist * tauValue;
+                    }
+                }
+                
+                for(int j=0;j<position;j++){
+                    if(adF[j] < minFlowValue){
+                        minFlowGram = j;
+                        minFlowValue = adF[j];
+                    }
+                }
+                
+                if(centroids[i] != anL[minFlowGram]){
+                    change[i] = 1;
+                    centroids[i] = anL[minFlowGram];
+                }
+            }
+            else if(centroids[i] != -1){
+                change[i] = 1;
+                centroids[i] = -1;                     
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
+    try{
+        
+        int flowAValue = cent * numFlowCells;
+        int flowBValue = flow * numFlowCells;
+        
+        double dist = 0;
+        
+        for(int i=0;i<length;i++){
+            dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+            flowAValue++;
+            flowBValue++;
+        }
+        
+        return dist / (double)length;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getDistToCentroid");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getNewWeights(){
+    try{
+        
+        double maxChange = 0;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            double difference = weight[i];
+            weight[i] = 0;
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                int index = cumNumSeqs[i] + j;
+                double tauValue = singleTau[seqNumber[index]];
+                weight[i] += tauValue;
+            }
+            
+            difference = fabs(weight[i] - difference);
+            if(difference > maxChange){        maxChange = difference; }
+        }
+        return maxChange;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getNewWeights");
+        exit(1);       
+    }          
+}
+ /**************************************************************************************************/
+double ShhherCommand::getLikelihood(){
+    
+    try{
+        
+        vector<long double> P(numSeqs, 0);
+        int effNumOTUs = 0;
+        
+        for(int i=0;i<numOTUs;i++){
+            if(weight[i] > MIN_WEIGHT){
+                effNumOTUs++;
+            }
+        }
+        
+        string hold;
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            for(int j=0;j<nSeqsPerOTU[i];j++){
+                int index = cumNumSeqs[i] + j;
+                int nI = seqIndex[index];
+                double singleDist = dist[seqNumber[index]];
+                
+                P[nI] += weight[i] * exp(-singleDist * sigma);
+            }
+        }
+        double nLL = 0.00;
+        for(int i=0;i<numSeqs;i++){
+            if(P[i] == 0){     P[i] = DBL_EPSILON;     }
+            
+            nLL += -log(P[i]);
+        }
+        
+        nLL = nLL -(double)numSeqs * log(sigma);
+        
+        return nLL; 
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "getNewWeights");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::checkCentroids(){
+    try{
+        vector<int> unique(numOTUs, 1);
+        
+        for(int i=0;i<numOTUs;i++){
+            if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                unique[i] = -1;
+            }
+        }
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            if(unique[i] == 1){
+                for(int j=i+1;j<numOTUs;j++){
+                    if(unique[j] == 1){
+                        
+                        if(centroids[j] == centroids[i]){
+                            unique[j] = 0;
+                            centroids[j] = -1;
+                            
+                            weight[i] += weight[j];
+                            weight[j] = 0.0;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "checkCentroids");
+        exit(1);       
+    }          
+}
+ /**************************************************************************************************/
+
+
+void ShhherCommand::writeQualities(vector<int> otuCounts){
+    
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
+        
+        ofstream qualityFile;
+        m->openOutputFile(qualityFileName, qualityFile);
+        
+        qualityFile.setf(ios::fixed, ios::floatfield);
+        qualityFile.setf(ios::showpoint);
+        qualityFile << setprecision(6);
+        
+        vector<vector<int> > qualities(numOTUs);
+        vector<double> pr(HOMOPS, 0);
+        
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int index = 0;
+            int base = 0;
+            
+            if(nSeqsPerOTU[i] > 0){
+                qualities[i].assign(1024, -1);
+                
+                while(index < numFlowCells){
+                    double maxPrValue = 1e8;
+                    short maxPrIndex = -1;
+                    double count = 0.0000;
+                    
+                    pr.assign(HOMOPS, 0);
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int lIndex = cumNumSeqs[i] + j;
+                        double tauValue = singleTau[seqNumber[lIndex]];
+                        int sequenceIndex = aaI[i][j];
+                        short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
+                        
+                        count += tauValue;
+                        
+                        for(int s=0;s<HOMOPS;s++){
+                            pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
+                        }
+                    }
+                    
+                    maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
+                    maxPrValue = pr[maxPrIndex];
+                    
+                    if(count > MIN_COUNT){
+                        double U = 0.0000;
+                        double norm = 0.0000;
+                        
+                        for(int s=0;s<HOMOPS;s++){
+                            norm += exp(-(pr[s] - maxPrValue));
+                        }
+                        
+                        for(int s=1;s<=maxPrIndex;s++){
+                            int value = 0;
+                            double temp = 0.0000;
+                            
+                            U += exp(-(pr[s-1]-maxPrValue))/norm;
+                            
+                            if(U>0.00){
+                                temp = log10(U);
+                            }
+                            else{
+                                temp = -10.1;
+                            }
+                            temp = floor(-10 * temp);
+                            value = (int)floor(temp);
+                            if(value > 100){   value = 100;    }
+                            
+                            qualities[i][base] = (int)value;
+                            base++;
+                        }
+                    }
+                    
+                    index++;
+                }
+            }
+            
+            
+            if(otuCounts[i] > 0){
+                qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
+                
+                int j=4;       //need to get past the first four bases
+                while(qualities[i][j] != -1){
+                    qualityFile << qualities[i][j] << ' ';
+                    j++;
+                }
+                qualityFile << endl;
+            }
+        }
+        qualityFile.close();
+        outputNames.push_back(qualityFileName);
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeQualities");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeSequences(vector<int> otuCounts){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
+        ofstream fastaFile;
+        m->openOutputFile(fastaFileName, fastaFile);
+        
+        vector<string> names(numOTUs, "");
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            int index = centroids[i];
+            
+            if(otuCounts[i] > 0){
+                fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
+                
+                string newSeq = "";
+                
+                for(int j=0;j<numFlowCells;j++){
+                    
+                    char base = flowOrder[j % 4];
+                    for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
+                        newSeq += base;
+                    }
+                }
+                
+                fastaFile << newSeq.substr(4) << endl;
+            }
+        }
+        fastaFile.close();
+        
+        outputNames.push_back(fastaFileName);
+        
+        if(compositeFASTAFileName != ""){
+            m->appendFiles(fastaFileName, compositeFASTAFileName);
+        }
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeSequences");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeNames(vector<int> otuCounts){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
+        ofstream nameFile;
+        m->openOutputFile(nameFileName, nameFile);
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            if(otuCounts[i] > 0){
+                nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
+                
+                for(int j=1;j<nSeqsPerOTU[i];j++){
+                    nameFile << ',' << seqNameVector[aaI[i][j]];
+                }
+                
+                nameFile << endl;
+            }
+        }
+        nameFile.close();
+        outputNames.push_back(nameFileName);
+        
+        
+        if(compositeNamesFileName != ""){
+            m->appendFiles(nameFileName, compositeNamesFileName);
+        }              
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeNames");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeGroups(){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
+        string groupFileName = fileRoot + "shhh.groups";
+        ofstream groupFile;
+        m->openOutputFile(groupFileName, groupFile);
+        
+        for(int i=0;i<numSeqs;i++){
+            if (m->control_pressed) { break; }
+            groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+        }
+        groupFile.close();
+        outputNames.push_back(groupFileName);
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeGroups");
+        exit(1);       
+    }          
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeClusters(vector<int> otuCounts){
+    try {
+        string thisOutputDir = outputDir;
+        if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
+        string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
+        ofstream otuCountsFile;
+        m->openOutputFile(otuCountsFileName, otuCountsFile);
+        
+        string bases = flowOrder;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) {
+                break;
+            }
+            //output the translated version of the centroid sequence for the otu
+            if(otuCounts[i] > 0){
+                int index = centroids[i];
+                
+                otuCountsFile << "ideal\t";
+                for(int j=8;j<numFlowCells;j++){
+                    char base = bases[j % 4];
+                    for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
+                        otuCountsFile << base;
+                    }
+                }
+                otuCountsFile << endl;
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int sequence = aaI[i][j];
+                    otuCountsFile << seqNameVector[sequence] << '\t';
+                    
+                    string newSeq = "";
+                    
+                    for(int k=0;k<lengths[sequence];k++){
+                        char base = bases[k % 4];
+                        int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
+                        
+                        for(int s=0;s<freq;s++){
+                            newSeq += base;
+                            //otuCountsFile << base;
+                        }
+                    }
+                    otuCountsFile << newSeq.substr(4) << endl;
+                }
+                otuCountsFile << endl;
+            }
+        }
+        otuCountsFile.close();
+        outputNames.push_back(otuCountsFileName);
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhherCommand", "writeClusters");
+        exit(1);       
+    }          
+}
 
 #else
 //**********************************************************************************************************************
@@ -755,55 +1818,213 @@ int ShhherCommand::execute(){
                
                getSingleLookUp();      if (m->control_pressed) { return 0; }
                getJointLookUp();       if (m->control_pressed) { return 0; }
-                               
                
-               vector<string> flowFileVector;
-               if(flowFilesFileName != "not found"){
-                       string fName;
+        int numFiles = flowFileVector.size();
+               
+        if (numFiles < processors) { processors = numFiles; }
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+        if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
+        else { createProcesses(flowFileVector); } //each processor processes one file
+#else
+        driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
+#endif
+        
+               if(compositeFASTAFileName != ""){
+                       outputNames.push_back(compositeFASTAFileName);
+                       outputNames.push_back(compositeNamesFileName);
+               }
+
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "execute");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
+
+int ShhherCommand::createProcesses(vector<string> filenames){
+    try {
+        vector<int> processIDS;
+               int process = 1;
+               int num = 0;
+               
+               //sanity check
+               if (filenames.size() < processors) { processors = filenames.size(); }
+               
+               //divide the groups between the processors
+               vector<linePair> lines;
+               int numFilesPerProcessor = filenames.size() / processors;
+               for (int i = 0; i < processors; i++) {
+                       int startIndex =  i * numFilesPerProcessor;
+                       int endIndex = (i+1) * numFilesPerProcessor;
+                       if(i == (processors - 1)){      endIndex = filenames.size();    }
+                       lines.push_back(linePair(startIndex, endIndex));
+               }
+               
+        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
                        
-                       ifstream flowFilesFile;
-                       m->openInputFile(flowFilesFileName, flowFilesFile);
-                       while(flowFilesFile){
-                               fName = m->getline(flowFilesFile);
-                               flowFileVector.push_back(fName);
-                               m->gobble(flowFilesFile);
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
                        }
                }
-               else{
-                       flowFileVector.push_back(flowFileName);
+               
+               //do my part
+               driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+        #else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+        
+        /////////////////////// NOT WORKING, ACCESS VIOLATION ON READ OF FLOWGRAMS IN THREAD /////////////////
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<shhhFlowsData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = "";
+                       if (i != 0) { extension = toString(i) + ".temp"; }
+                       
+            shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
+                       pDataArray.push_back(tempFlow);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
                }
-               int numFiles = flowFileVector.size();
                
+               //using the main process as a worker saves time and memory
+               //do my part
+               driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
                
-               for(int i=0;i<numFiles;i++){
+        #endif
+        
+        for (int i=0;i<processIDS.size();i++) { 
+            if (compositeFASTAFileName != "") {
+                m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
+                m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
+                m->mothurRemove((compositeFASTAFileName + toString(processIDS[i]) + ".temp"));
+                m->mothurRemove((compositeNamesFileName + toString(processIDS[i]) + ".temp"));
+            }
+        }
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "createProcesses");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+    try {
+        
+        for(int i=start;i<end;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       flowFileName = flowFileVector[i];
-
-                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+                       string flowFileName = filenames[i];
+            
+                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
                        m->mothurOut("Reading flowgrams...\n");
-                       getFlowData();
+                       
+            vector<string> seqNameVector;
+            vector<int> lengths;
+            vector<short> flowDataIntI;
+            vector<double> flowDataPrI;
+            map<string, int> nameMap;
+            vector<short> uniqueFlowgrams;
+            vector<int> uniqueCount;
+            vector<int> mapSeqToUnique;
+            vector<int> mapUniqueToSeq;
+            vector<int> uniqueLengths;
+            int numFlowCells;
+            
+            int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
                        
                        if (m->control_pressed) { break; }
                        
                        m->mothurOut("Identifying unique flowgrams...\n");
-                       getUniques();
+                       int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
                        
                        if (m->control_pressed) { break; }
                        
                        m->mothurOut("Calculating distances between flowgrams...\n");
-                       string distFileName = createDistFile(processors);
-                       string namesFileName = createNamesFile();
+            string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
+            unsigned long long begTime = time(NULL);
+            double begClock = clock();
+        
+            flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);    
+            
+            m->mothurOutEndLine();
+            m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+
+            
+                       string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+                       createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
                        
                        if (m->control_pressed) { break; }
                        
                        m->mothurOut("\nClustering flowgrams...\n");
-                       string listFileName = cluster(distFileName, namesFileName);
+            string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+                       cluster(listFileName, distFileName, namesFileName);
                        
                        if (m->control_pressed) { break; }
+            
+            vector<int> otuData;
+            vector<int> cumNumSeqs;
+            vector<int> nSeqsPerOTU;
+            vector<vector<int> > aaP;  //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+            vector<vector<int> > aaI;  //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+            vector<int> seqNumber;             //tMaster->anP:         the sequence id number sorted by OTU
+            vector<int> seqIndex;              //tMaster->anI;         the index that corresponds to seqNumber
+
                        
-                       getOTUData(listFileName);
+                       int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
                        
                        if (m->control_pressed) { break; }
                        
@@ -811,16 +2032,35 @@ int ShhherCommand::execute(){
                        m->mothurRemove(namesFileName);
                        m->mothurRemove(listFileName);
                        
-                       initPyroCluster();
+            vector<double> dist;               //adDist - distance of sequences to centroids
+            vector<short> change;              //did the centroid sequence change? 0 = no; 1 = yes
+            vector<int> centroids;             //the representative flowgram for each cluster m
+            vector<double> weight;
+            vector<double> singleTau;  //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+            vector<int> nSeqsBreaks;
+            vector<int> nOTUsBreaks;
+            
+                       dist.assign(numSeqs * numOTUs, 0);
+            change.assign(numOTUs, 1);
+            centroids.assign(numOTUs, -1);
+            weight.assign(numOTUs, 0);
+            singleTau.assign(numSeqs, 1.0);
+            
+            nSeqsBreaks.assign(2, 0);
+            nOTUsBreaks.assign(2, 0);
+            
+            nSeqsBreaks[0] = 0;
+            nSeqsBreaks[1] = numSeqs;
+            nOTUsBreaks[1] = numOTUs;
                        
                        if (m->control_pressed) { break; }
                        
                        double maxDelta = 0;
                        int iter = 0;
                        
-                       double begClock = clock();
-                       unsigned long long begTime = time(NULL);
-
+                       begClock = clock();
+                       begTime = time(NULL);
+            
                        m->mothurOut("\nDenoising flowgrams...\n");
                        m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
                        
@@ -830,92 +2070,88 @@ int ShhherCommand::execute(){
                                
                                double cycClock = clock();
                                unsigned long long cycTime = time(NULL);
-                               fill();
+                               fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
                                
                                if (m->control_pressed) { break; }
-
-                               calcCentroids();
+                
+                               calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
                                
                                if (m->control_pressed) { break; }
-
-                               maxDelta = getNewWeights();  if (m->control_pressed) { break; }
-                               double nLL = getLikelihood(); if (m->control_pressed) { break; }
-                               checkCentroids();
+                
+                               maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
+                
+                if (m->control_pressed) { break; }
+                
+                               double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
+                
+                if (m->control_pressed) { break; }
+                
+                               checkCentroids(numOTUs, centroids, weight);
                                
                                if (m->control_pressed) { break; }
                                
-                               calcNewDistances();
+                               calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
                                
                                if (m->control_pressed) { break; }
                                
                                iter++;
                                
                                m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
-
+                
                        }       
                        
                        if (m->control_pressed) { break; }
                        
                        m->mothurOut("\nFinalizing...\n");
-                       fill();
+                       fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
                        
                        if (m->control_pressed) { break; }
                        
-                       setOTUs();
+                       setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
                        
                        if (m->control_pressed) { break; }
                        
                        vector<int> otuCounts(numOTUs, 0);
                        for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
                        
-                       calcCentroidsDriver(0, numOTUs);        if (m->control_pressed) { break; }
-                       writeQualities(otuCounts);                      if (m->control_pressed) { break; }
-                       writeSequences(otuCounts);                      if (m->control_pressed) { break; }
-                       writeNames(otuCounts);                          if (m->control_pressed) { break; }
-                       writeClusters(otuCounts);                       if (m->control_pressed) { break; }
-                       writeGroups();                                          if (m->control_pressed) { break; }
+                       calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
+            
+            if (m->control_pressed) { break; }
+            
+                       writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
+                       writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
+                       writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
+                       writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                       if (m->control_pressed) { break; }
+                       writeGroups(flowFileName, numSeqs, seqNameVector);                                              if (m->control_pressed) { break; }
                        
                        m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
-               
-               if(compositeFASTAFileName != ""){
-                       outputNames.push_back(compositeFASTAFileName);
-                       outputNames.push_back(compositeNamesFileName);
-               }
-
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "execute");
-               exit(1);
-       }
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+        
+        return 0;
+        
+    }catch(exception& e) {
+            m->errorOut(e, "ShhherCommand", "driver");
+            exit(1);
+    }
 }
-#endif
-/**************************************************************************************************/
 
-void ShhherCommand::getFlowData(){
+/**************************************************************************************************/
+int ShhherCommand::getFlowData(string filename, vector<string>& thisSeqNameVector, vector<int>& thisLengths, vector<short>& thisFlowDataIntI, map<string, int>& thisNameMap, int& numFlowCells){
        try{
+       
                ifstream flowFile;
-               m->openInputFile(flowFileName, flowFile);
+       
+               m->openInputFile(filename, flowFile);
                
                string seqName;
-               seqNameVector.clear();
-               lengths.clear();
-               flowDataIntI.clear();
-               nameMap.clear();
-               
-               
                int currentNumFlowCells;
-               
                float intensity;
+        thisSeqNameVector.clear();
+               thisLengths.clear();
+               thisFlowDataIntI.clear();
+               thisNameMap.clear();
                
                flowFile >> numFlowCells;
                int index = 0;//pcluster
@@ -924,32 +2160,34 @@ void ShhherCommand::getFlowData(){
                        if (m->control_pressed) { break; }
                        
                        flowFile >> seqName >> currentNumFlowCells;
-                       lengths.push_back(currentNumFlowCells);
-
-                       seqNameVector.push_back(seqName);
-                       nameMap[seqName] = index++;//pcluster
+                       thisLengths.push_back(currentNumFlowCells);
+           
+                       thisSeqNameVector.push_back(seqName);
+                       thisNameMap[seqName] = index++;//pcluster
 
                        for(int i=0;i<numFlowCells;i++){
                                flowFile >> intensity;
                                if(intensity > 9.99)    {       intensity = 9.99;       }
                                int intI = int(100 * intensity + 0.0001);
-                               flowDataIntI.push_back(intI);
+                               thisFlowDataIntI.push_back(intI);
                        }
                        m->gobble(flowFile);
                }
                flowFile.close();
                
-               numSeqs = seqNameVector.size();         
+               int numSeqs = thisSeqNameVector.size();         
                
                for(int i=0;i<numSeqs;i++){
                        
                        if (m->control_pressed) { break; }
                        
                        int iNumFlowCells = i * numFlowCells;
-                       for(int j=lengths[i];j<numFlowCells;j++){
-                               flowDataIntI[iNumFlowCells + j] = 0;
+                       for(int j=thisLengths[i];j<numFlowCells;j++){
+                               thisFlowDataIntI[iNumFlowCells + j] = 0;
                        }
                }
+        
+        return numSeqs;
                
        }
        catch(exception& e) {
@@ -957,100 +2195,97 @@ void ShhherCommand::getFlowData(){
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::getSingleLookUp(){
-       try{
-               //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
-               singleLookUp.assign(HOMOPS * NUMBINS, 0);
+int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector<int>& mapUniqueToSeq, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
+       try{            
+        
+               ostringstream outStream;
+               outStream.setf(ios::fixed, ios::floatfield);
+               outStream.setf(ios::dec, ios::basefield);
+               outStream.setf(ios::showpoint);
+               outStream.precision(6);
                
-               int index = 0;
-               ifstream lookUpFile;
-               m->openInputFile(lookupFileName, lookUpFile);
-               
-               for(int i=0;i<HOMOPS;i++){
+               int begTime = time(NULL);
+               double begClock = clock();
+        
+               for(int i=0;i<stopSeq;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       float logFracFreq;
-                       lookUpFile >> logFracFreq;
-                       
-                       for(int j=0;j<NUMBINS;j++)      {
-                               lookUpFile >> singleLookUp[index];
-                               index++;
+                       for(int j=0;j<i;j++){
+                               float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
+                
+                               if(flowDistance < 1e-6){
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
+                               }
+                               else if(flowDistance <= cutoff){
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
+                               }
                        }
-               }       
-               lookUpFile.close();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getSingleLookUp");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::getJointLookUp(){
-       try{
+                       if(i % 100 == 0){
+                               m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
+                               m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+                               m->mothurOutEndLine();
+                       }
+               }
                
-               //      the most likely joint probability (-log) that two intenities have the same polymer length
-               jointLookUp.resize(NUMBINS * NUMBINS, 0);
+               ofstream distFile(distFileName.c_str());
+               distFile << outStream.str();            
+               distFile.close();
                
-               for(int i=0;i<NUMBINS;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       for(int j=0;j<NUMBINS;j++){             
-                               
-                               double minSum = 100000000;
-                               
-                               for(int k=0;k<HOMOPS;k++){
-                                       double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
-                                       
-                                       if(sum < minSum)        {       minSum = sum;           }
-                               }       
-                               jointLookUp[i * NUMBINS + j] = minSum;
-                       }
+               if (m->control_pressed) {}
+               else {
+                       m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
+                       m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+                       m->mothurOutEndLine();
                }
+        
+        return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getJointLookUp");
+               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
-double ShhherCommand::getProbIntensity(int intIntensity){                          
+float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
        try{
-
-               double minNegLogProb = 100000000; 
-
+               int minLength = lengths[mapSeqToUnique[seqA]];
+               if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
                
-               for(int i=0;i<HOMOPS;i++){//loop signal strength
+               int ANumFlowCells = seqA * numFlowCells;
+               int BNumFlowCells = seqB * numFlowCells;
+               
+               float dist = 0;
+               
+               for(int i=0;i<minLength;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
-                       if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
+                       int flowAIntI = flowDataIntI[ANumFlowCells + i];
+                       float flowAPrI = flowDataPrI[ANumFlowCells + i];
+                       
+                       int flowBIntI = flowDataIntI[BNumFlowCells + i];
+                       float flowBPrI = flowDataPrI[BNumFlowCells + i];
+                       dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
                }
                
-               return minNegLogProb;
+               dist /= (float) minLength;
+               return dist;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getProbIntensity");
+               m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
                exit(1);
        }
 }
 
 /**************************************************************************************************/
 
-void ShhherCommand::getUniques(){
+int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector<short>& uniqueFlowgrams, vector<int>& uniqueCount, vector<int>& uniqueLengths, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
        try{
-               
-               
-               numUniques = 0;
+               int numUniques = 0;
                uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
                uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
                uniqueLengths.assign(numSeqs, 0);
@@ -1069,7 +2304,7 @@ void ShhherCommand::getUniques(){
                        for(int j=0;j<numFlowCells;j++){
                                current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
                        }
-                                               
+            
                        for(int j=0;j<numUniques;j++){
                                int offset = j * numFlowCells;
                                bool toEnd = 1;
@@ -1077,7 +2312,7 @@ void ShhherCommand::getUniques(){
                                int shorterLength;
                                if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
                                else                                                            {       shorterLength = uniqueLengths[j];       }
-
+                
                                for(int k=0;k<shorterLength;k++){
                                        if(current[k] != uniqueFlowgrams[offset + k]){
                                                toEnd = 0;
@@ -1114,220 +2349,16 @@ void ShhherCommand::getUniques(){
                
                flowDataPrI.resize(numSeqs * numFlowCells, 0);
                for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
+        
+        return numUniques;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "getUniques");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
-
-float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
-       try{
-               int minLength = lengths[mapSeqToUnique[seqA]];
-               if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
-               
-               int ANumFlowCells = seqA * numFlowCells;
-               int BNumFlowCells = seqB * numFlowCells;
-               
-               float dist = 0;
-               
-               for(int i=0;i<minLength;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       int flowAIntI = flowDataIntI[ANumFlowCells + i];
-                       float flowAPrI = flowDataPrI[ANumFlowCells + i];
-                       
-                       int flowBIntI = flowDataIntI[BNumFlowCells + i];
-                       float flowBPrI = flowDataPrI[BNumFlowCells + i];
-                       dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
-               }
-               
-               dist /= (float) minLength;
-               return dist;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int stopSeq){
-       try{            
-
-               ostringstream outStream;
-               outStream.setf(ios::fixed, ios::floatfield);
-               outStream.setf(ios::dec, ios::basefield);
-               outStream.setf(ios::showpoint);
-               outStream.precision(6);
-               
-               int begTime = time(NULL);
-               double begClock = clock();
-
-               for(int i=startSeq;i<stopSeq;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       for(int j=0;j<i;j++){
-                               float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
-
-                               if(flowDistance < 1e-6){
-                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
-                               }
-                               else if(flowDistance <= cutoff){
-                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
-                               }
-                       }
-                       if(i % 100 == 0){
-                               m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
-                               m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                               m->mothurOutEndLine();
-                       }
-               }
-               
-               ofstream distFile(distFileName.c_str());
-               distFile << outStream.str();            
-               distFile.close();
-               
-               if (m->control_pressed) {}
-               else {
-                       m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
-                       m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                       m->mothurOutEndLine();
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
-               exit(1);
-       }
-}
-
-/**************************************************************************************************/
-
-string ShhherCommand::createDistFile(int processors){
-       try{
-//////////////////////// until I figure out the shared memory issue //////////////////////             
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-#else
-               processors=1;
-#endif
-//////////////////////// until I figure out the shared memory issue //////////////////////             
-               
-               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
-                               
-               unsigned long long begTime = time(NULL);
-               double begClock = clock();
-               
-               if (numSeqs < processors){      processors = 1; }
-               
-               if(processors == 1)     {       flowDistParentFork(fDistFileName, 0, numUniques);               }
-               
-               else{ //you have multiple processors
-                       
-                       vector<int> start(processors, 0);
-                       vector<int> end(processors, 0);
-                       
-                       int process = 1;
-                       vector<int> processIDs;
-                       
-                       for (int i = 0; i < processors; i++) {
-                               start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
-                               end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
-                       }
-                       
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = fork();
-                               
-                               if (pid > 0) {
-                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                                       process++;
-                               }else if (pid == 0){
-                                       flowDistParentFork(fDistFileName + toString(getpid()) + ".temp", start[process], end[process]);
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
-                                       perror(" : ");
-                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                       
-                       //parent does its part
-                       flowDistParentFork(fDistFileName, start[0], end[0]);
-                       
-                       //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDs.size();i++) { 
-                               int temp = processIDs[i];
-                               wait(&temp);
-                       }
-#else
-                       //////////////////////////////////////////////////////////////////////////////////////////////////////
-                       //Windows version shared memory, so be careful when passing variables through the flowDistParentForkData struct. 
-                       //Above fork() will clone, so memory is separate, but that's not the case with windows, 
-                       //////////////////////////////////////////////////////////////////////////////////////////////////////
-                       
-                       vector<flowDistParentForkData*> pDataArray; 
-                       DWORD   dwThreadIdArray[processors-1];
-                       HANDLE  hThreadArray[processors-1]; 
-                       
-                       //Create processor worker threads.
-                       for(int i = 0; i < processors-1; i++){
-                               // Allocate memory for thread data.
-                               string extension = extension = toString(i) + ".temp"; 
-                               
-                               flowDistParentForkData* tempdist = new flowDistParentForkData((fDistFileName + extension), mapUniqueToSeq, mapSeqToUnique, lengths, flowDataIntI, flowDataPrI, jointLookUp, m, start[i+1], end[i+1], numFlowCells, cutoff, i);
-                               pDataArray.push_back(tempdist);
-                               processIDs.push_back(i);
-                               
-                               //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
-                               //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
-                               hThreadArray[i] = CreateThread(NULL, 0, MyflowDistParentForkThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
-                       }
-                       
-                       //parent does its part
-                       flowDistParentFork(fDistFileName, start[0], end[0]);
-                       
-                       //Wait until all threads have terminated.
-                       WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-                       
-                       //Close all thread handles and free memory allocations.
-                       for(int i=0; i < pDataArray.size(); i++){
-                               CloseHandle(hThreadArray[i]);
-                               delete pDataArray[i];
-                       }
-                       
-#endif
-                       
-                       //append and remove temp files
-                       for (int i=0;i<processIDs.size();i++) { 
-                               m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
-                               m->mothurRemove((fDistFileName + toString(processIDs[i]) + ".temp"));
-                       }
-                       
-               }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
-               
-               return fDistFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "createDistFile");
-               exit(1);
-       }
-       
-}
-
-/**************************************************************************************************/
-
-string ShhherCommand::createNamesFile(){
+int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector<string>& seqNameVector, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq){
        try{
                
                vector<string> duplicateNames(numUniques, "");
@@ -1335,31 +2366,29 @@ string ShhherCommand::createNamesFile(){
                        duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
                }
                
-               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
-               
                ofstream nameFile;
-               m->openOutputFile(nameFileName, nameFile);
+               m->openOutputFile(filename, nameFile);
                
                for(int i=0;i<numUniques;i++){
                        
                        if (m->control_pressed) { break; }
                        
-//                     nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+            //                 nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
                        nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
                }
                
                nameFile.close();
-               return  nameFileName;
+        
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "createNamesFile");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
-string ShhherCommand::cluster(string distFileName, string namesFileName){
+int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){
        try {
                
                ReadMatrix* read = new ReadColumnMatrix(distFileName);  
@@ -1374,7 +2403,7 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                
                delete read; 
                delete clusterNameMap; 
-                               
+        
                RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
                Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
@@ -1390,33 +2419,39 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                
                list->setLabel(toString(cutoff));
                
-               string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
                ofstream listFile;
-               m->openOutputFile(listFileName, listFile);
+               m->openOutputFile(filename, listFile);
                list->print(listFile);
                listFile.close();
                
                delete matrix;  delete cluster; delete rabund; delete list;
-       
-               return listFileName;
+        
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "cluster");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::getOTUData(string listFileName){
+int ShhherCommand::getOTUData(int numSeqs, string fileName,  vector<int>& otuData,
+                               vector<int>& cumNumSeqs,
+                               vector<int>& nSeqsPerOTU,
+                               vector<vector<int> >& aaP,      //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+                               vector<vector<int> >& aaI,      //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+                               vector<int>& seqNumber,         //tMaster->anP:         the sequence id number sorted by OTU
+                               vector<int>& seqIndex,
+                               map<string, int>& nameMap){
        try {
-
+        
                ifstream listFile;
-               m->openInputFile(listFileName, listFile);
+               m->openInputFile(fileName, listFile);
                string label;
+        int numOTUs;
                
                listFile >> label >> numOTUs;
-
+        
                otuData.assign(numSeqs, 0);
                cumNumSeqs.assign(numOTUs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
@@ -1431,11 +2466,11 @@ void ShhherCommand::getOTUData(string listFileName){
                for(int i=0;i<numOTUs;i++){
                        
                        if (m->control_pressed) { break; }
-
+            
                        listFile >> singleOTU;
                        
                        istringstream otuString(singleOTU);
-
+            
                        while(otuString){
                                
                                string seqName = "";
@@ -1460,10 +2495,10 @@ void ShhherCommand::getOTUData(string listFileName){
                                }
                                
                                map<string,int>::iterator nmIt = nameMap.find(seqName);
-
+                
                                int index = nmIt->second;
                                nameMap.erase(nmIt);
-
+                
                                otuData[index] = i;
                                nSeqsPerOTU[i]++;
                                aaP[i].push_back(index);        
@@ -1489,6 +2524,8 @@ void ShhherCommand::getOTUData(string listFileName){
                seqIndex = seqNumber;
                
                listFile.close();       
+        
+        return numOTUs;
                
        }
        catch(exception& e) {
@@ -1496,124 +2533,28 @@ void ShhherCommand::getOTUData(string listFileName){
                exit(1);        
        }               
 }
-
-/**************************************************************************************************/
-
-void ShhherCommand::initPyroCluster(){                          
-       try{
-               if (numOTUs < processors) { processors = 1; }
-
-               dist.assign(numSeqs * numOTUs, 0);
-               change.assign(numOTUs, 1);
-               centroids.assign(numOTUs, -1);
-               weight.assign(numOTUs, 0);
-               singleTau.assign(numSeqs, 1.0);
-               
-               nSeqsBreaks.assign(processors+1, 0);
-               nOTUsBreaks.assign(processors+1, 0);
-
-               nSeqsBreaks[0] = 0;
-               for(int i=0;i<processors;i++){
-                       nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
-                       nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
-               }
-               nSeqsBreaks[processors] = numSeqs;
-               nOTUsBreaks[processors] = numOTUs;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "initPyroCluster");
-               exit(1);        
-       }
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::fill(){
-       try {
-               int index = 0;
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       cumNumSeqs[i] = index;
-                       for(int j=0;j<nSeqsPerOTU[i];j++){
-                               seqNumber[index] = aaP[i][j];
-                               seqIndex[index] = aaI[i][j];
-                               
-                               index++;
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "fill");
-               exit(1);        
-       }               
-}
-
 /**************************************************************************************************/
 
-void ShhherCommand::calcCentroids(){                          
-       try{
-               
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               
-               if(processors == 1)     {
-                       calcCentroidsDriver(0, numOTUs);                
-               }
-               else{ //you have multiple processors
-                       if (numOTUs < processors){      processors = 1; }
-                       
-                       int process = 1;
-                       vector<int> processIDs;
-                       
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = vfork();
-                               
-                               if (pid > 0) {
-                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                                       process++;
-                               }else if (pid == 0){
-                                       calcCentroidsDriver(nOTUsBreaks[process], nOTUsBreaks[process+1]);
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
-                                       perror(" : ");
-                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                       
-                       //parent does its part
-                       calcCentroidsDriver(nOTUsBreaks[0], nOTUsBreaks[1]);
-
-                       //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDs.size();i++) { 
-                               int temp = processIDs[i];
-                               wait(&temp);
-                       }
-               }
-               
-#else
-               calcCentroidsDriver(0, numOTUs);
-#endif         
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
+int ShhherCommand::calcCentroidsDriver(int numOTUs, 
+                                          vector<int>& cumNumSeqs,
+                                          vector<int>& nSeqsPerOTU,
+                                          vector<int>& seqIndex,
+                                          vector<short>& change,               //did the centroid sequence change? 0 = no; 1 = yes
+                                          vector<int>& centroids,              //the representative flowgram for each cluster m
+                                          vector<double>& singleTau,   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+                                          vector<int>& mapSeqToUnique,
+                                          vector<short>& uniqueFlowgrams,
+                                          vector<short>& flowDataIntI,
+                                          vector<int>& lengths,
+                                          int numFlowCells,
+                                          vector<int>& seqNumber){                          
        
        //this function gets the most likely homopolymer length at a flow position for a group of sequences
        //within an otu
        
        try{
                
-               for(int i=start;i<finish;i++){
+               for(int i=0;i<numOTUs;i++){
                        
                        if (m->control_pressed) { break; }
                        
@@ -1626,7 +2567,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
                        }
-
+            
                        if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
                                vector<double> adF(nSeqsPerOTU[i]);
                                vector<int> anL(nSeqsPerOTU[i]);
@@ -1656,7 +2597,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                                        double tauValue = singleTau[seqNumber[index]];
                                        
                                        for(int k=0;k<position;k++){
-                                               double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
+                                               double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
                                                adF[k] += dist * tauValue;
                                        }
                                }
@@ -1678,23 +2619,25 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                                centroids[i] = -1;                      
                        }
                }
+        
+        return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
+double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector<short>& uniqueFlowgrams,
+                                        vector<short>& flowDataIntI, int numFlowCells){
        try{
                
                int flowAValue = cent * numFlowCells;
                int flowBValue = flow * numFlowCells;
                
                double dist = 0;
-
+        
                for(int i=0;i<length;i++){
                        dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
                        flowAValue++;
@@ -1708,321 +2651,129 @@ double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
                exit(1);        
        }               
 }
-
-/**************************************************************************************************/
-
-double ShhherCommand::getNewWeights(){
-       try{
-               
-               double maxChange = 0;
-               
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       double difference = weight[i];
-                       weight[i] = 0;
-                       
-                       for(int j=0;j<nSeqsPerOTU[i];j++){
-                               int index = cumNumSeqs[i] + j;
-                               double tauValue = singleTau[seqNumber[index]];
-                               weight[i] += tauValue;
-                       }
-                       
-                       difference = fabs(weight[i] - difference);
-                       if(difference > maxChange){     maxChange = difference; }
-               }
-               return maxChange;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getNewWeights");
-               exit(1);        
-       }               
-}
-
 /**************************************************************************************************/
 
-double ShhherCommand::getLikelihood(){
-       
-       try{
-               
-               vector<long double> P(numSeqs, 0);
-               int effNumOTUs = 0;
-               
-               for(int i=0;i<numOTUs;i++){
-                       if(weight[i] > MIN_WEIGHT){
-                               effNumOTUs++;
-                       }
-               }
-               
-               string hold;
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       for(int j=0;j<nSeqsPerOTU[i];j++){
-                               int index = cumNumSeqs[i] + j;
-                               int nI = seqIndex[index];
-                               double singleDist = dist[seqNumber[index]];
-                               
-                               P[nI] += weight[i] * exp(-singleDist * sigma);
-                       }
-               }
-               double nLL = 0.00;
-               for(int i=0;i<numSeqs;i++){
-                       if(P[i] == 0){  P[i] = DBL_EPSILON;     }
-
-                       nLL += -log(P[i]);
-               }
-               
-               nLL = nLL -(double)numSeqs * log(sigma);
-
-               return nLL; 
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getNewWeights");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::checkCentroids(){
-       try{
-               vector<int> unique(numOTUs, 1);
-               
-               for(int i=0;i<numOTUs;i++){
-                       if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
-                               unique[i] = -1;
-                       }
-               }
-               
-               for(int i=0;i<numOTUs;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       if(unique[i] == 1){
-                               for(int j=i+1;j<numOTUs;j++){
-                                       if(unique[j] == 1){
-                                               
-                                               if(centroids[j] == centroids[i]){
-                                                       unique[j] = 0;
-                                                       centroids[j] = -1;
-                                                       
-                                                       weight[i] += weight[j];
-                                                       weight[j] = 0.0;
-                                               }
-                                       }
-                               }
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "checkCentroids");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-
-void ShhherCommand::calcNewDistances(){                          
-       try{
-               
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-
-               if(processors == 1)     {
-                       calcNewDistancesParent(0, numSeqs);             
-               }
-               else{ //you have multiple processors
-                       if (numSeqs < processors){      processors = 1; }
-                       
-                       vector<vector<int> > child_otuIndex(processors);
-                       vector<vector<int> > child_seqIndex(processors);
-                       vector<vector<double> > child_singleTau(processors);                    
-                       vector<int> totals(processors);
-                       
-                       int process = 1;
-                       vector<int> processIDs;
-
-                       //loop through and create all the processes you want
-                       while (process != processors) {
-                               int pid = vfork();
-                               
-                               if (pid > 0) {
-                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
-                                       process++;
-                               }else if (pid == 0){
-                                       calcNewDistancesChild(nSeqsBreaks[process], nSeqsBreaks[process+1], child_otuIndex[process], child_seqIndex[process], child_singleTau[process]);
-                                       totals[process] = child_otuIndex[process].size();
-
-                                       exit(0);
-                               }else { 
-                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
-                                       perror(" : ");
-                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
-                                       exit(0);
-                               }
-                       }
-                               
-                       //parent does its part
-                       calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
-                       int total = seqIndex.size();
-
-                       //force parent to wait until all the processes are done
-                       for (int i=0;i<processIDs.size();i++) { 
-                               int temp = processIDs[i];
-                               wait(&temp);
-                       }
-
-                       for(int i=1;i<processors;i++){
-                               int oldTotal = total;
-                               total += totals[i];
-
-                               singleTau.resize(total, 0);
-                               seqIndex.resize(total, 0);
-                               seqNumber.resize(total, 0);
-                               
-                               int childIndex = 0;
-                               
-                               for(int j=oldTotal;j<total;j++){
-                                       int otuI = child_otuIndex[i][childIndex];
-                                       int seqI = child_seqIndex[i][childIndex];
-
-                                       singleTau[j] = child_singleTau[i][childIndex];
-                                       aaP[otuI][nSeqsPerOTU[otuI]] = j;
-                                       aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
-                                       nSeqsPerOTU[otuI]++;
-
-                                       childIndex++;
-                               }
-                       }
-               }
-#else
-               calcNewDistancesParent(0, numSeqs);             
-#endif         
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistances");
-               exit(1);        
-       }               
-}
-
-/**************************************************************************************************/
-#ifdef USE_MPI
-void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
-       
-       try{
-               vector<double> newTau(numOTUs,0);
-               vector<double> norms(numSeqs, 0);
-               otuIndex.clear();
-               seqIndex.clear();
-               singleTau.clear();
-               
-               for(int i=startSeq;i<stopSeq;i++){
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       double offset = 1e8;
-                       int indexOffset = i * numOTUs;
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               
-                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
-                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
-                               }
-                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
-                                       offset = dist[indexOffset + j];
-                               }
-                       }
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               if(weight[j] > MIN_WEIGHT){
-                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
-                                       norms[i] += newTau[j];
-                               }
-                               else{
-                                       newTau[j] = 0.0;
-                               }
-                       }
-                       
-                       for(int j=0;j<numOTUs;j++){
-
-                               newTau[j] /= norms[i];
-                               
-                               if(newTau[j] > MIN_TAU){
-                                       otuIndex.push_back(j);
-                                       seqIndex.push_back(i);
-                                       singleTau.push_back(newTau[j]);
-                               }
+double ShhherCommand::getNewWeights(int numOTUs, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<double>& singleTau, vector<int>& seqNumber, vector<double>& weight){
+       try{
+               
+               double maxChange = 0;
+               
+               for(int i=0;i<numOTUs;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       double difference = weight[i];
+                       weight[i] = 0;
+                       
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               int index = cumNumSeqs[i] + j;
+                               double tauValue = singleTau[seqNumber[index]];
+                               weight[i] += tauValue;
                        }
                        
+                       difference = fabs(weight[i] - difference);
+                       if(difference > maxChange){     maxChange = difference; }
                }
+               return maxChange;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
+               m->errorOut(e, "ShhherCommand", "getNewWeights");
                exit(1);        
        }               
 }
-#endif
+
 /**************************************************************************************************/
 
-void ShhherCommand::calcNewDistancesChild(int startSeq, int stopSeq, vector<int>& child_otuIndex, vector<int>& child_seqIndex, vector<double>& child_singleTau){
+double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<int>& seqNumber, vector<int>& cumNumSeqs, vector<int>& seqIndex, vector<double>& dist, vector<double>& weight){
        
        try{
-               vector<double> newTau(numOTUs,0);
-               vector<double> norms(numSeqs, 0);
-               child_otuIndex.resize(0);
-               child_seqIndex.resize(0);
-               child_singleTau.resize(0);
                
-               for(int i=startSeq;i<stopSeq;i++){
+               vector<long double> P(numSeqs, 0);
+               int effNumOTUs = 0;
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(weight[i] > MIN_WEIGHT){
+                               effNumOTUs++;
+                       }
+               }
+               
+               string hold;
+               for(int i=0;i<numOTUs;i++){
                        
                        if (m->control_pressed) { break; }
                        
-                       double offset = 1e8;
-                       int indexOffset = i * numOTUs;
-                       
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
-                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
-                               }
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               int index = cumNumSeqs[i] + j;
+                               int nI = seqIndex[index];
+                               double singleDist = dist[seqNumber[index]];
                                
-                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
-                                       offset = dist[indexOffset + j];
-                               }
+                               P[nI] += weight[i] * exp(-singleDist * sigma);
                        }
-                       
-                       for(int j=0;j<numOTUs;j++){
-                               if(weight[j] > MIN_WEIGHT){
-                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
-                                       norms[i] += newTau[j];
-                               }
-                               else{
-                                       newTau[j] = 0.0;
-                               }
+               }
+               double nLL = 0.00;
+               for(int i=0;i<numSeqs;i++){
+                       if(P[i] == 0){  P[i] = DBL_EPSILON;     }
+            
+                       nLL += -log(P[i]);
+               }
+               
+               nLL = nLL -(double)numSeqs * log(sigma);
+        
+               return nLL; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getNewWeights");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+int ShhherCommand::checkCentroids(int numOTUs, vector<int>& centroids, vector<double>& weight){
+       try{
+               vector<int> unique(numOTUs, 1);
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                               unique[i] = -1;
                        }
+               }
+               
+               for(int i=0;i<numOTUs;i++){
                        
-                       for(int j=0;j<numOTUs;j++){
-                               newTau[j] /= norms[i];
-                               
-                               if(newTau[j] > MIN_TAU){
-                                       child_otuIndex.push_back(j);
-                                       child_seqIndex.push_back(i);
-                                       child_singleTau.push_back(newTau[j]);
+                       if (m->control_pressed) { break; }
+                       
+                       if(unique[i] == 1){
+                               for(int j=i+1;j<numOTUs;j++){
+                                       if(unique[j] == 1){
+                                               
+                                               if(centroids[j] == centroids[i]){
+                                                       unique[j] = 0;
+                                                       centroids[j] = -1;
+                                                       
+                                                       weight[i] += weight[j];
+                                                       weight[j] = 0.0;
+                                               }
+                                       }
                                }
                        }
                }
+        
+        return 0;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistancesChild");
+               m->errorOut(e, "ShhherCommand", "checkCentroids");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
+void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<double>& dist, 
+                                     vector<double>& weight, vector<short>& change, vector<int>& centroids,
+                                     vector<vector<int> >& aaP,        vector<double>& singleTau, vector<vector<int> >& aaI,   
+                                     vector<int>& seqNumber, vector<int>& seqIndex,
+                                     vector<short>& uniqueFlowgrams,
+                                     vector<short>& flowDataIntI, int numFlowCells, vector<int>& lengths){
        
        try{
                
@@ -2030,26 +2781,26 @@ void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
                vector<double> newTau(numOTUs,0);
                vector<double> norms(numSeqs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
-
-               for(int i=startSeq;i<stopSeq;i++){
+        
+               for(int i=0;i<numSeqs;i++){
                        
                        if (m->control_pressed) { break; }
                        
                        int indexOffset = i * numOTUs;
-
+            
                        double offset = 1e8;
                        
                        for(int j=0;j<numOTUs;j++){
-
+                
                                if(weight[j] > MIN_WEIGHT && change[j] == 1){
-                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
                                }
-       
+                
                                if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
                                        offset = dist[indexOffset + j];
                                }
                        }
-
+            
                        for(int j=0;j<numOTUs;j++){
                                if(weight[j] > MIN_WEIGHT){
                                        newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
@@ -2059,11 +2810,11 @@ void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
                                        newTau[j] = 0.0;
                                }
                        }
-
+            
                        for(int j=0;j<numOTUs;j++){
                                newTau[j] /= norms[i];
                        }
-       
+            
                        for(int j=0;j<numOTUs;j++){
                                if(newTau[j] > MIN_TAU){
                                        
@@ -2082,19 +2833,44 @@ void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
                                        nSeqsPerOTU[j]++;
                                }
                        }
-
+            
                }
-
+        
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
+               m->errorOut(e, "ShhherCommand", "calcNewDistances");
                exit(1);        
        }               
 }
+/**************************************************************************************************/
 
+int ShhherCommand::fill(int numOTUs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
+       try {
+               int index = 0;
+               for(int i=0;i<numOTUs;i++){
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       cumNumSeqs[i] = index;
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               seqNumber[index] = aaP[i][j];
+                               seqIndex[index] = aaI[i][j];
+                               
+                               index++;
+                       }
+               }
+        
+        return 0; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "fill");
+               exit(1);        
+       }               
+}
 /**************************************************************************************************/
 
-void ShhherCommand::setOTUs(){
+void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU,
+                            vector<int>& otuData, vector<double>& singleTau, vector<double>& dist, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
        
        try {
                vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
@@ -2137,26 +2913,28 @@ void ShhherCommand::setOTUs(){
                        
                        nSeqsPerOTU[index]++;
                }
-               fill(); 
+        
+               fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);  
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "calcNewDistances");
+               m->errorOut(e, "ShhherCommand", "setOTUs");
                exit(1);        
        }               
 }
-
 /**************************************************************************************************/
 
-void ShhherCommand::writeQualities(vector<int> otuCounts){
+void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
+                                   vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
+                                   vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
        
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
-
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
+        
                ofstream qualityFile;
                m->openOutputFile(qualityFileName, qualityFile);
-
+        
                qualityFile.setf(ios::fixed, ios::floatfield);
                qualityFile.setf(ios::showpoint);
                qualityFile << setprecision(6);
@@ -2245,7 +3023,7 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
                }
                qualityFile.close();
                outputNames.push_back(qualityFileName);
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeQualities");
@@ -2255,11 +3033,11 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeSequences(vector<int> otuCounts){
+void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
                
@@ -2288,11 +3066,11 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                        }
                }
                fastaFile.close();
-
+        
                outputNames.push_back(fastaFileName);
-
-               if(compositeFASTAFileName != ""){
-                       m->appendFiles(fastaFileName, compositeFASTAFileName);
+        
+               if(thisCompositeFASTAFileName != ""){
+                       m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
                }
        }
        catch(exception& e) {
@@ -2303,11 +3081,11 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeNames(vector<int> otuCounts){
+void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
                
@@ -2329,8 +3107,8 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
                outputNames.push_back(nameFileName);
                
                
-               if(compositeNamesFileName != ""){
-                       m->appendFiles(nameFileName, compositeNamesFileName);
+               if(thisCompositeNamesFileName != ""){
+                       m->appendFiles(nameFileName, thisCompositeNamesFileName);
                }               
        }
        catch(exception& e) {
@@ -2341,12 +3119,12 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeGroups(){
+void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
-               string groupFileName = fileRoot + ".shhh.groups";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
+               string groupFileName = fileRoot + "shhh.groups";
                ofstream groupFile;
                m->openOutputFile(groupFileName, groupFile);
                
@@ -2356,7 +3134,7 @@ void ShhherCommand::writeGroups(){
                }
                groupFile.close();
                outputNames.push_back(groupFileName);
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeGroups");
@@ -2366,11 +3144,11 @@ void ShhherCommand::writeGroups(){
 
 /**************************************************************************************************/
 
-void ShhherCommand::writeClusters(vector<int> otuCounts){
+void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
+               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
@@ -2403,7 +3181,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                                        for(int k=0;k<lengths[sequence];k++){
                                                char base = bases[k % 4];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-                                                       
+                        
                                                for(int s=0;s<freq;s++){
                                                        newSeq += base;
                                                        //otuCountsFile << base;
@@ -2416,7 +3194,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                }
                otuCountsFile.close();
                outputNames.push_back(otuCountsFileName);
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeClusters");
@@ -2424,4 +3202,92 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
        }               
 }
 
-//**********************************************************************************************************************
+/**************************************************************************************************/
+
+void ShhherCommand::getSingleLookUp(){
+       try{
+               //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
+               singleLookUp.assign(HOMOPS * NUMBINS, 0);
+               
+               int index = 0;
+               ifstream lookUpFile;
+               m->openInputFile(lookupFileName, lookUpFile);
+               
+               for(int i=0;i<HOMOPS;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       float logFracFreq;
+                       lookUpFile >> logFracFreq;
+                       
+                       for(int j=0;j<NUMBINS;j++)      {
+                               lookUpFile >> singleLookUp[index];
+                               index++;
+                       }
+               }       
+               lookUpFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getSingleLookUp");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::getJointLookUp(){
+       try{
+               
+               //      the most likely joint probability (-log) that two intenities have the same polymer length
+               jointLookUp.resize(NUMBINS * NUMBINS, 0);
+               
+               for(int i=0;i<NUMBINS;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       for(int j=0;j<NUMBINS;j++){             
+                               
+                               double minSum = 100000000;
+                               
+                               for(int k=0;k<HOMOPS;k++){
+                                       double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
+                                       
+                                       if(sum < minSum)        {       minSum = sum;           }
+                               }       
+                               jointLookUp[i * NUMBINS + j] = minSum;
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getJointLookUp");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getProbIntensity(int intIntensity){                          
+       try{
+
+               double minNegLogProb = 100000000; 
+
+               
+               for(int i=0;i<HOMOPS;i++){//loop signal strength
+                       
+                       if (m->control_pressed) { break; }
+                       
+                       float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
+                       if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
+               }
+               
+               return minNegLogProb;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getProbIntensity");
+               exit(1);
+       }
+}
+
+
+
+
index b3a0071079bdbcbdb6a1069cf6bc1d2f53f8fad9..a7af7da4f1ce8a7d0c0e6daa809a58f95f2164fb 100644 (file)
 
 #include "mothur.h"
 #include "command.hpp"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+#include "cluster.hpp"
+#include "sparsematrix.hpp"
+#include <cfloat>
 
 //**********************************************************************************************************************
 
@@ -42,20 +50,92 @@ public:
        void help() { m->mothurOut(getHelpString()); }          
 private:
        
+    struct linePair {
+               int start;
+               int end;
+               linePair(int i, int j) : start(i), end(j) {}
+       };
+    
        int abort;
-       
        string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
 
        int processors, maxIters;
        float cutoff, sigma, minDelta;
        string flowOrder;
-       
-       vector<int> nSeqsBreaks;
-       vector<int> nOTUsBreaks;
+    
+    vector<string> outputNames;
        vector<double> singleLookUp;
        vector<double> jointLookUp;
+    vector<string> flowFileVector;
+       
+    int driver(vector<string>, string, string, int, int);
+    int createProcesses(vector<string>);
+    int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
+    int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
+    int flowDistParentFork(int, string, int, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
+    float calcPairwiseDist(int, int, int, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
+    int createNamesFile(int, int, string, vector<string>&, vector<int>&, vector<int>&);
+    int cluster(string, string, string);
+    int getOTUData(int numSeqs, string,  vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&, vector<int>&, vector<int>&,map<string, int>&);
+    int calcCentroidsDriver(int numOTUs, vector<int>&, vector<int>&, vector<int>&, vector<short>&, vector<int>&, vector<double>&, vector<int>&, vector<short>&, vector<short>&, vector<int>&, int, vector<int>&);
+    double getDistToCentroid(int, int, int, vector<short>&, vector<short>&, int);
+    double getNewWeights(int, vector<int>&, vector<int>&, vector<double>&, vector<int>&, vector<double>&);
+    
+    double getLikelihood(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<double>&);
+    int checkCentroids(int, vector<int>&, vector<double>&);
+    void calcNewDistances(int, int, vector<int>& , vector<double>&,vector<double>& , vector<short>& change, vector<int>&,vector<vector<int> >&,        vector<double>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&, vector<short>&, int, vector<int>&);
+    int fill(int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&);
+    void setOTUs(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&,
+                 vector<int>&, vector<double>&, vector<double>&, vector<vector<int> >&, vector<vector<int> >&);
+    void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
+    void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
+    void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
+    void writeGroups(string, int, vector<string>&);
+    void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
+    
+       void getSingleLookUp();
+       void getJointLookUp();
+    double getProbIntensity(int);
        
-       vector<string> seqNameVector;
+       
+#ifdef USE_MPI
+       string flowDistMPI(int, int);
+       void calcNewDistancesChildMPI(int, int, vector<int>&);
+
+       int pid, ncpus; 
+    
+     void getFlowData();
+     void getUniques();
+     
+     float calcPairwiseDist(int, int);
+     void flowDistParentFork(string, int, int);
+     
+     string createDistFile(int);
+     string createNamesFile();
+     string cluster(string, string);
+     
+     void getOTUData(string);
+     void initPyroCluster();
+     void fill();
+     void calcCentroids();
+     void calcCentroidsDriver(int, int);
+     double getDistToCentroid(int, int, int);
+     double getNewWeights();
+     double getLikelihood();
+     void checkCentroids();
+     void calcNewDistances();
+     void calcNewDistancesParent(int, int);
+     void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
+     
+     
+     void setOTUs();
+     void writeQualities(vector<int>);
+     void writeSequences(vector<int>);
+     void writeNames(vector<int>);
+     void writeGroups();
+     void writeClusters(vector<int>);
+    
+    vector<string> seqNameVector;
        vector<int> lengths;
        vector<short> flowDataIntI;
        vector<double> flowDataPrI;
@@ -77,50 +157,10 @@ private:
        vector<int> mapSeqToUnique;
        vector<int> mapUniqueToSeq;
        vector<int> uniqueLengths;
+    int numSeqs, numUniques, numOTUs, numFlowCells;
+    vector<int> nSeqsBreaks;
+       vector<int> nOTUsBreaks;
 
-       vector<string> outputNames;
-
-       int numSeqs, numUniques, numOTUs, numFlowCells;
-       
-       void getSingleLookUp();
-       void getJointLookUp();
-       void getFlowData();
-       void getUniques();
-       double getProbIntensity(int);
-       float calcPairwiseDist(int, int);
-       void flowDistParentFork(string, int, int);
-       
-       string createDistFile(int);
-       string createNamesFile();
-       string cluster(string, string);
-       
-       void getOTUData(string);
-       void initPyroCluster();
-       void fill();
-       void calcCentroids();
-       void calcCentroidsDriver(int, int);
-       double getDistToCentroid(int, int, int);
-       double getNewWeights();
-       double getLikelihood();
-       void checkCentroids();
-       void calcNewDistances();
-       void calcNewDistancesParent(int, int);
-       void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
-
-
-       void setOTUs();
-       void writeQualities(vector<int>);
-       void writeSequences(vector<int>);
-       void writeNames(vector<int>);
-       void writeGroups();
-       void writeClusters(vector<int>);
-
-       
-#ifdef USE_MPI
-       string flowDistMPI(int, int);
-       void calcNewDistancesChildMPI(int, int, vector<int>&);
-
-       int pid, ncpus; 
 #endif
        
 };
@@ -129,32 +169,34 @@ private:
 //custom data structure for threads to use.
 // This is passed by void pointer so it can be any data type
 // that can be passed using a single void pointer (LPVOID).
-struct flowDistParentForkData {
-       string distFileName; 
-       vector<int> mapUniqueToSeq;
-       vector<int> mapSeqToUnique;
-       vector<int> lengths;
-       vector<short> flowDataIntI;
-       vector<double> flowDataPrI;
+struct shhhFlowsData {
+       int threadID, maxIters;
+       float cutoff, sigma, minDelta;
+       string flowOrder;
+       vector<double> singleLookUp;
        vector<double> jointLookUp;
+    vector<string> filenames;
+    vector<string> outputNames;
+    string thisCompositeFASTAFileName, thisCompositeNameFileName, outputDir;
+    int start, stop;
        MothurOut* m;
-       int threadID, startSeq, stopSeq, numFlowCells;
-       float cutoff;
        
-       flowDistParentForkData(){}
-       flowDistParentForkData(string d, vector<int> mapU, vector<int> mapS, vector<int> l, vector<short> flowD, vector<double> flowDa, vector<double> j, MothurOut* mout, int st, int sp, int n, float cut, int tid) {
-               distFileName = d;
-               mapUniqueToSeq = mapU;
-               mapSeqToUnique = mapS;
-               lengths = l;
-               flowDataIntI = flowD;
-               flowDataPrI = flowDa;
-               jointLookUp = j;
+       shhhFlowsData(){}
+       shhhFlowsData(vector<string> f, string cf, string cn, string ou, string flor, vector<double> jl, vector<double> sl, MothurOut* mout, int st, int sp, float cut, float si, float mD, int mx, int tid) {
+               filenames = f;
+        thisCompositeFASTAFileName = cf;
+        thisCompositeNameFileName = cn;
+        outputDir = ou;
+        flowOrder = flor;
                m = mout;
-               startSeq = st;
-               stopSeq = sp;
-               numFlowCells = n;
+               start = st;
+               stop = sp;
                cutoff= cut;
+        sigma = si;
+        minDelta = mD;
+        maxIters = mx;
+        jointLookUp = jl;
+        singleLookUp = sl;
                threadID = tid;
        }
 };
@@ -162,83 +204,1158 @@ struct flowDistParentForkData {
 /**************************************************************************************************/
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
 #else
-static DWORD WINAPI MyflowDistParentForkThreadFunction(LPVOID lpParam){ 
-       flowDistParentForkData* pDataArray;
-       pDataArray = (flowDistParentForkData*)lpParam;
+static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ 
+       shhhFlowsData* pDataArray;
+       pDataArray = (shhhFlowsData*)lpParam;
        
        try {
-               ostringstream outStream;
-               outStream.setf(ios::fixed, ios::floatfield);
-               outStream.setf(ios::dec, ios::basefield);
-               outStream.setf(ios::showpoint);
-               outStream.precision(6);
-               
-               int begTime = time(NULL);
-               double begClock = clock();
-               string tempOut = "start and end = " + toString(pDataArray->startSeq) +'\t' + toString(pDataArray->stopSeq) + "-";
-               cout << tempOut << endl;
+        
+               for(int l=pDataArray->start;l<pDataArray->stop;l++){
+                       
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       string flowFileName = pDataArray->filenames[l];
+            
+                       pDataArray->m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(l+1) + " of " + toString(pDataArray->filenames.size()) + ")\t<<<<<\n");
+                       pDataArray->m->mothurOut("Reading flowgrams...\n");
+                       
+            vector<string> seqNameVector;
+            vector<int> lengths;
+            vector<short> flowDataIntI;
+            vector<double> flowDataPrI;
+            map<string, int> nameMap;
+            vector<short> uniqueFlowgrams;
+            vector<int> uniqueCount;
+            vector<int> mapSeqToUnique;
+            vector<int> mapUniqueToSeq;
+            vector<int> uniqueLengths;
+            int numFlowCells;
+            
+            //int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
+            /*****************************************************************************************************/
+            
+            ifstream flowFile;
+           // cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
+            pDataArray->m->openInputFile(flowFileName, flowFile);
+            
+           // cout << "herethread " << flowFileName << endl;
+            string seqName;
+            int currentNumFlowCells;
+            float intensity;
+                        
+            flowFile >> numFlowCells;
+            int index = 0;//pcluster
+            while(!flowFile.eof()){
+                
+                if (pDataArray->m->control_pressed) { flowFile.close(); return 0; }
+                
+                flowFile >> seqName >> currentNumFlowCells;
+                lengths.push_back(currentNumFlowCells);
+             //  cout << "herethread " << seqName << endl;  
+                seqNameVector.push_back(seqName);
+                nameMap[seqName] = index++;//pcluster
+                
+                for(int i=0;i<numFlowCells;i++){
+                    flowFile >> intensity;
+                    if(intensity > 9.99)       {       intensity = 9.99;       }
+                    int intI = int(100 * intensity + 0.0001);
+                    flowDataIntI.push_back(intI);
+                }
+                pDataArray->m->gobble(flowFile);
+            }
+            flowFile.close();
+            
+            int numSeqs = seqNameVector.size();                
+          //  cout << numSeqs << endl;   
+            for(int i=0;i<numSeqs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                int iNumFlowCells = i * numFlowCells;
+                for(int j=lengths[i];j<numFlowCells;j++){
+                    flowDataIntI[iNumFlowCells + j] = 0;
+                }
+            }
+          //  cout << "here" << endl; 
+            /*****************************************************************************************************/
        
-               for(int i=pDataArray->startSeq;i<pDataArray->stopSeq;i++){
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
+                       //int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
+            /*****************************************************************************************************/
+            int numUniques = 0;
+            uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
+            uniqueCount.assign(numSeqs, 0);                                                    //      anWeights
+            uniqueLengths.assign(numSeqs, 0);
+            mapSeqToUnique.assign(numSeqs, -1);
+            mapUniqueToSeq.assign(numSeqs, -1);
+            
+            vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
+            
+            for(int i=0;i<numSeqs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                int index = 0;
+                
+                vector<short> current(numFlowCells);
+                for(int j=0;j<numFlowCells;j++){
+                    current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+                }
+                
+                for(int j=0;j<numUniques;j++){
+                    int offset = j * numFlowCells;
+                    bool toEnd = 1;
+                    
+                    int shorterLength;
+                    if(lengths[i] < uniqueLengths[j])  {       shorterLength = lengths[i];                     }
+                    else                                                               {       shorterLength = uniqueLengths[j];       }
+                    
+                    for(int k=0;k<shorterLength;k++){
+                        if(current[k] != uniqueFlowgrams[offset + k]){
+                            toEnd = 0;
+                            break;
+                        }
+                    }
+                    
+                    if(toEnd){
+                        mapSeqToUnique[i] = j;
+                        uniqueCount[j]++;
+                        index = j;
+                        if(lengths[i] > uniqueLengths[j])      {       uniqueLengths[j] = lengths[i];  }
+                        break;
+                    }
+                    index++;
+                }
+                
+                if(index == numUniques){
+                    uniqueLengths[numUniques] = lengths[i];
+                    uniqueCount[numUniques] = 1;
+                    mapSeqToUnique[i] = numUniques;//anMap
+                    mapUniqueToSeq[numUniques] = i;//anF
+                    
+                    for(int k=0;k<numFlowCells;k++){
+                        uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
+                        uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
+                    }
+                    
+                    numUniques++;
+                }
+            }
+            uniqueFlowDataIntI.resize(numFlowCells * numUniques);
+            uniqueLengths.resize(numUniques);  
+            
+            flowDataPrI.resize(numSeqs * numFlowCells, 0);
+            for(int i=0;i<flowDataPrI.size();i++)      {       
+                if (pDataArray->m->control_pressed) { return 0; } 
+                //flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);  
+                
+                flowDataPrI[i] = 100000000; 
+            
+                for(int j=0;j<HOMOPS;j++){//loop signal strength
+                    
+                    if (pDataArray->m->control_pressed) { return 0; }
+                    
+                    float negLogProb = pDataArray->singleLookUp[j * NUMBINS + flowDataIntI[i]];
+                    if(negLogProb < flowDataPrI[i])    {       flowDataPrI[i] = negLogProb; }
+                }
+            }            
+            
+            /*****************************************************************************************************/
+                       
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurOut("Calculating distances between flowgrams...\n");
+            string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
+            unsigned long long begTime = time(NULL);
+            double begClock = clock();
+            
+            //flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);  
+            /*****************************************************************************************************/
+            ostringstream outStream;
+            outStream.setf(ios::fixed, ios::floatfield);
+            outStream.setf(ios::dec, ios::basefield);
+            outStream.setf(ios::showpoint);
+            outStream.precision(6);
+            
+            int thisbegTime = time(NULL);
+            double thisbegClock = clock();
+            
+            for(int i=0;i<numUniques;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                for(int j=0;j<i;j++){
+                    //float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
+                    /*****************************************************************************************************/
+                    int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
+                    int minLength = lengths[mapSeqToUnique[seqA]];
+                    if(lengths[seqB] < minLength){     minLength = lengths[mapSeqToUnique[seqB]];      }
+                    
+                    int ANumFlowCells = seqA * numFlowCells;
+                    int BNumFlowCells = seqB * numFlowCells;
+                    
+                    float flowDistance = 0;
+                    
+                    for(int k=0;k<minLength;k++){
+                        
+                        if (pDataArray->m->control_pressed) { break; }
+                        
+                        int flowAIntI = flowDataIntI[ANumFlowCells + k];
+                        float flowAPrI = flowDataPrI[ANumFlowCells + k];
+                        
+                        int flowBIntI = flowDataIntI[BNumFlowCells + k];
+                        float flowBPrI = flowDataPrI[BNumFlowCells + k];
+                        flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
+                    }
+                    
+                    flowDistance /= (float) minLength;
+                    /*****************************************************************************************************/
+
+                    if(flowDistance < 1e-6){
+                        outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
+                    }
+                    else if(flowDistance <= pDataArray->cutoff){
+                        outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
+                    }
+                }
+                if(i % 100 == 0){
+                    pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - thisbegTime));
+                    pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
+                    pDataArray->m->mothurOutEndLine();
+                }
+            }
+            
+            ofstream distFile(distFileName.c_str());
+            distFile << outStream.str();               
+            distFile.close();
+            
+            if (pDataArray->m->control_pressed) {}
+            else {
+                pDataArray->m->mothurOut(toString(numUniques-1) + "\t" + toString(time(NULL) - thisbegTime));
+                pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
+                pDataArray->m->mothurOutEndLine();
+            }
+            /*****************************************************************************************************/
+
+            pDataArray->m->mothurOutEndLine();
+            pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+            
+                       string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+                       //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
+            /*****************************************************************************************************/
+            vector<string> duplicateNames(numUniques, "");
+            for(int i=0;i<numSeqs;i++){
+                duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
+            }
+            
+            ofstream nameFile;
+            pDataArray->m->openOutputFile(namesFileName, nameFile);
+            
+            for(int i=0;i<numUniques;i++){
+                if (pDataArray->m->control_pressed) { nameFile.close(); return 0; }
+                nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+            }
+            nameFile.close();
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurOut("\nClustering flowgrams...\n");
+            string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
+                       //cluster(listFileName, distFileName, namesFileName);
+            /*****************************************************************************************************/
+            ReadMatrix* read = new ReadColumnMatrix(distFileName);     
+            read->setCutoff(pDataArray->cutoff);
+            
+            NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+            clusterNameMap->readMap();
+            read->read(clusterNameMap);
+            
+            ListVector* list = read->getListVector();
+            SparseMatrix* matrix = read->getMatrix();
+            
+            delete read; 
+            delete clusterNameMap; 
+            
+            RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+            
+            Cluster* cluster = new CompleteLinkage(rabund, list, matrix, pDataArray->cutoff, "furthest"); 
+            string tag = cluster->getTag();
+            
+            double clusterCutoff = pDataArray->cutoff;
+            while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                cluster->update(clusterCutoff);
+            }
+            
+            list->setLabel(toString(pDataArray->cutoff));
+            
+            ofstream listFileOut;
+            pDataArray->m->openOutputFile(listFileName, listFileOut);
+            list->print(listFileOut);
+            listFileOut.close();
+            
+            delete matrix;     delete cluster; delete rabund; delete list;
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { return 0; }
+            
+            vector<int> otuData;
+            vector<int> cumNumSeqs;
+            vector<int> nSeqsPerOTU;
+            vector<vector<int> > aaP;  //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+            vector<vector<int> > aaI;  //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+            vector<int> seqNumber;             //tMaster->anP:         the sequence id number sorted by OTU
+            vector<int> seqIndex;              //tMaster->anI;         the index that corresponds to seqNumber
+            
+                       
+                       //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
+                       /*****************************************************************************************************/
+            ifstream listFile;
+            pDataArray->m->openInputFile(listFileName, listFile);
+            string label;
+            int numOTUs;
+            
+            listFile >> label >> numOTUs;
+            
+            otuData.assign(numSeqs, 0);
+            cumNumSeqs.assign(numOTUs, 0);
+            nSeqsPerOTU.assign(numOTUs, 0);
+            aaP.clear();aaP.resize(numOTUs);
+            
+            seqNumber.clear();
+            aaI.clear();
+            seqIndex.clear();
+            
+            string singleOTU = "";
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                listFile >> singleOTU;
+                
+                istringstream otuString(singleOTU);
+                
+                while(otuString){
+                    
+                    string seqName = "";
+                    
+                    for(int j=0;j<singleOTU.length();j++){
+                        char letter = otuString.get();
+                        
+                        if(letter != ','){
+                            seqName += letter;
+                        }
+                        else{
+                            map<string,int>::iterator nmIt = nameMap.find(seqName);
+                            int index = nmIt->second;
+                            
+                            nameMap.erase(nmIt);
+                            
+                            otuData[index] = i;
+                            nSeqsPerOTU[i]++;
+                            aaP[i].push_back(index);
+                            seqName = "";
+                        }
+                    }
+                    
+                    map<string,int>::iterator nmIt = nameMap.find(seqName);
+                    
+                    int index = nmIt->second;
+                    nameMap.erase(nmIt);
+                    
+                    otuData[index] = i;
+                    nSeqsPerOTU[i]++;
+                    aaP[i].push_back(index);   
+                    
+                    otuString.get();
+                }
+                
+                sort(aaP[i].begin(), aaP[i].end());
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    seqNumber.push_back(aaP[i][j]);
+                }
+                for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
+                    aaP[i].push_back(0);
+                }
+                
+                
+            }
+            
+            for(int i=1;i<numOTUs;i++){
+                cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
+            }
+            aaI = aaP;
+            seqIndex = seqNumber;
+            
+            listFile.close();      
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { return 0; }
+                       
+                       pDataArray->m->mothurRemove(distFileName);
+                       pDataArray->m->mothurRemove(namesFileName);
+                       pDataArray->m->mothurRemove(listFileName);
+                       
+            vector<double> dist;               //adDist - distance of sequences to centroids
+            vector<short> change;              //did the centroid sequence change? 0 = no; 1 = yes
+            vector<int> centroids;             //the representative flowgram for each cluster m
+            vector<double> weight;
+            vector<double> singleTau;  //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+            vector<int> nSeqsBreaks;
+            vector<int> nOTUsBreaks;
+            
+                       dist.assign(numSeqs * numOTUs, 0);
+            change.assign(numOTUs, 1);
+            centroids.assign(numOTUs, -1);
+            weight.assign(numOTUs, 0);
+            singleTau.assign(numSeqs, 1.0);
+            
+            nSeqsBreaks.assign(2, 0);
+            nOTUsBreaks.assign(2, 0);
+            
+            nSeqsBreaks[0] = 0;
+            nSeqsBreaks[1] = numSeqs;
+            nOTUsBreaks[1] = numOTUs;
                        
                        if (pDataArray->m->control_pressed) { break; }
-                       cout << "thread i = " << i << endl;
-                       for(int j=0;j<i;j++){
+                       
+                       double maxDelta = 0;
+                       int iter = 0;
+                       
+                       begClock = clock();
+                       begTime = time(NULL);
+            
+                       pDataArray->m->mothurOut("\nDenoising flowgrams...\n");
+                       pDataArray->m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
+                       
+                       while((pDataArray->maxIters == 0 && maxDelta > pDataArray->minDelta) || iter < MIN_ITER || (maxDelta > pDataArray->minDelta && iter < pDataArray->maxIters)){
+                               
+                               if (pDataArray->m->control_pressed) { break; }
                                
-                               cout << "thread j = " << j << endl;
-                               float flowDistance = 0.0;
-                               ////////////////// calcPairwiseDist ///////////////////
-                               //needed because this is a static global function that can't see the classes internal functions
+                               double cycClock = clock();
+                               unsigned long long cycTime = time(NULL);
+                               //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+                /*****************************************************************************************************/
+                int indexFill = 0;
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { return 0; }
+                    
+                    cumNumSeqs[i] = indexFill;
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        seqNumber[indexFill] = aaP[i][j];
+                        seqIndex[indexFill] = aaI[i][j];
+                        
+                        indexFill++;
+                    }
+                }
+                /*****************************************************************************************************/
+
                                
-                               int minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[i]]];
-                               if(pDataArray->lengths[pDataArray->mapUniqueToSeq[j]] < minLength){     minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[j]]];     }
+                               if (pDataArray->m->control_pressed) { break; }
+                
+                               //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
+                /*****************************************************************************************************/
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    double count = 0;
+                    int position = 0;
+                    int minFlowGram = 100000000;
+                    double minFlowValue = 1e8;
+                    change[i] = 0; //FALSE
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+                    }
+                    
+                    if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                        vector<double> adF(nSeqsPerOTU[i]);
+                        vector<int> anL(nSeqsPerOTU[i]);
+                        
+                        for(int j=0;j<nSeqsPerOTU[i];j++){
+                            int index = cumNumSeqs[i] + j;
+                            int nI = seqIndex[index];
+                            int nIU = mapSeqToUnique[nI];
+                            
+                            int k;
+                            for(k=0;k<position;k++){
+                                if(nIU == anL[k]){
+                                    break;
+                                }
+                            }
+                            if(k == position){
+                                anL[position] = nIU;
+                                adF[position] = 0.0000;
+                                position++;
+                            }                                          
+                        }
+                        
+                        for(int j=0;j<nSeqsPerOTU[i];j++){
+                            int index = cumNumSeqs[i] + j;
+                            int nI = seqIndex[index];
+                            
+                            double tauValue = singleTau[seqNumber[index]];
+                            
+                            for(int k=0;k<position;k++){
+                               // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
+                                /*****************************************************************************************************/
+                                int flowAValue = anL[k] * numFlowCells;
+                                int flowBValue = nI * numFlowCells;
+                                
+                                double dist = 0;
+                                
+                                for(int l=0;l<lengths[nI];l++){
+                                    dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                                    flowAValue++;
+                                    flowBValue++;
+                                }
+                                
+                                dist = dist / (double)lengths[nI];
+                                /*****************************************************************************************************/
+                                adF[k] += dist * tauValue;
+                            }
+                        }
+                        
+                        for(int j=0;j<position;j++){
+                            if(adF[j] < minFlowValue){
+                                minFlowGram = j;
+                                minFlowValue = adF[j];
+                            }
+                        }
+                        
+                        if(centroids[i] != anL[minFlowGram]){
+                            change[i] = 1;
+                            centroids[i] = anL[minFlowGram];
+                        }
+                    }
+                    else if(centroids[i] != -1){
+                        change[i] = 1;
+                        centroids[i] = -1;                     
+                    }
+                }
+                /*****************************************************************************************************/
+
+                               if (pDataArray->m->control_pressed) { break; }
+                
+                               //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
+                /*****************************************************************************************************/
+                double maxChange = 0;
+                
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    double difference = weight[i];
+                    weight[i] = 0;
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        double tauValue = singleTau[seqNumber[index]];
+                        weight[i] += tauValue;
+                    }
+                    
+                    difference = fabs(weight[i] - difference);
+                    if(difference > maxChange){        maxChange = difference; }
+                }
+                maxDelta = maxChange;
+                /*****************************************************************************************************/
+
+                if (pDataArray->m->control_pressed) { break; }
+                
+                               //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
+                /*****************************************************************************************************/
+                vector<long double> P(numSeqs, 0);
+                int effNumOTUs = 0;
+                
+                for(int i=0;i<numOTUs;i++){
+                    if(weight[i] > MIN_WEIGHT){
+                        effNumOTUs++;
+                    }
+                }
+                
+                string hold;
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        int nI = seqIndex[index];
+                        double singleDist = dist[seqNumber[index]];
+                        
+                        P[nI] += weight[i] * exp(-singleDist * pDataArray->sigma);
+                    }
+                }
+                double nLL = 0.00;
+                for(int i=0;i<numSeqs;i++){
+                    if(P[i] == 0){     P[i] = DBL_EPSILON;     }
+                    
+                    nLL += -log(P[i]);
+                }
+                
+                nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
+                /*****************************************************************************************************/
+
+                if (pDataArray->m->control_pressed) { break; }
+                
+                               //checkCentroids(numOTUs, centroids, weight);
+                /*****************************************************************************************************/
+                vector<int> unique(numOTUs, 1);
+                
+                for(int i=0;i<numOTUs;i++){
+                    if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                        unique[i] = -1;
+                    }
+                }
+                
+                for(int i=0;i<numOTUs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    if(unique[i] == 1){
+                        for(int j=i+1;j<numOTUs;j++){
+                            if(unique[j] == 1){
+                                
+                                if(centroids[j] == centroids[i]){
+                                    unique[j] = 0;
+                                    centroids[j] = -1;
+                                    
+                                    weight[i] += weight[j];
+                                    weight[j] = 0.0;
+                                }
+                            }
+                        }
+                    }
+                }
+                /*****************************************************************************************************/
+
+                               if (pDataArray->m->control_pressed) { break; }
                                
-                               int ANumFlowCells = pDataArray->mapUniqueToSeq[i] * pDataArray->numFlowCells;
-                               int BNumFlowCells = pDataArray->mapUniqueToSeq[j] * pDataArray->numFlowCells;
+                               //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
+                /*****************************************************************************************************/
+                int total = 0;
+                vector<double> newTau(numOTUs,0);
+                vector<double> norms(numSeqs, 0);
+                nSeqsPerOTU.assign(numOTUs, 0);
+                
+                for(int i=0;i<numSeqs;i++){
+                    
+                    if (pDataArray->m->control_pressed) { break; }
+                    
+                    int indexOffset = i * numOTUs;
+                    
+                    double offset = 1e8;
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        
+                        if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                            //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
+                            /*****************************************************************************************************/
+                            int flowAValue = centroids[j] * numFlowCells;
+                            int flowBValue = i * numFlowCells;
+                            
+                            double distTemp = 0;
+                            
+                            for(int l=0;l<lengths[i];l++){
+                                distTemp += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                                flowAValue++;
+                                flowBValue++;
+                            }
+                            
+                            dist[indexOffset + j] = distTemp / (double)lengths[i];
+                            /*****************************************************************************************************/
+
+                        }
+                        
+                        if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                            offset = dist[indexOffset + j];
+                        }
+                    }
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        if(weight[j] > MIN_WEIGHT){
+                            newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                            norms[i] += newTau[j];
+                        }
+                        else{
+                            newTau[j] = 0.0;
+                        }
+                    }
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        newTau[j] /= norms[i];
+                    }
+                    
+                    for(int j=0;j<numOTUs;j++){
+                        if(newTau[j] > MIN_TAU){
+                            
+                            int oldTotal = total;
+                            
+                            total++;
+                            
+                            singleTau.resize(total, 0);
+                            seqNumber.resize(total, 0);
+                            seqIndex.resize(total, 0);
+                            
+                            singleTau[oldTotal] = newTau[j];
+                            
+                            aaP[j][nSeqsPerOTU[j]] = oldTotal;
+                            aaI[j][nSeqsPerOTU[j]] = i;
+                            nSeqsPerOTU[j]++;
+                        }
+                    }
+                    
+                }
+
+                /*****************************************************************************************************/
+
+                               if (pDataArray->m->control_pressed) { break; }
                                
-                               for(int k=0;k<minLength;k++){
-                                       
-                                       if (pDataArray->m->control_pressed) { break; }
-                                       
-                                       int flowAIntI = pDataArray->flowDataIntI[ANumFlowCells + k];
-                                       float flowAPrI = pDataArray->flowDataPrI[ANumFlowCells + k];
-                                       
-                                       int flowBIntI = pDataArray->flowDataIntI[BNumFlowCells + k];
-                                       float flowBPrI = pDataArray->flowDataPrI[BNumFlowCells + k];
-                                       flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
-                               }
+                               iter++;
                                
-                               flowDistance /= (float) minLength;
-                               //cout << flowDistance << endl;
-                               ////////////////// end of calcPairwiseDist ///////////////////
-                                                               
-                               if(flowDistance < 1e-6){
-                                       outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
-                               }
-                               else if(flowDistance <= pDataArray->cutoff){
-                                       outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << flowDistance << endl;
-                               }
-                       }
-                       if(i % 100 == 0){
-                               pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
-                               pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                               pDataArray->m->mothurOutEndLine();
-                       }
+                               pDataArray->m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+                
+                       }       
+                       
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       pDataArray->m->mothurOut("\nFinalizing...\n");
+                       //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
+            /*****************************************************************************************************/
+            int indexFill = 0;
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                cumNumSeqs[i] = indexFill;
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    seqNumber[indexFill] = aaP[i][j];
+                    seqIndex[indexFill] = aaI[i][j];
+                    
+                    indexFill++;
+                }
+            }
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
+            /*****************************************************************************************************/
+            vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    int index = cumNumSeqs[i] + j;
+                    double tauValue = singleTau[seqNumber[index]];
+                    int sIndex = seqIndex[index];
+                    bigTauMatrix[sIndex * numOTUs + i] = tauValue;                             
+                }
+            }
+            
+            for(int i=0;i<numSeqs;i++){
+                double maxTau = -1.0000;
+                int maxOTU = -1;
+                for(int j=0;j<numOTUs;j++){
+                    if(bigTauMatrix[i * numOTUs + j] > maxTau){
+                        maxTau = bigTauMatrix[i * numOTUs + j];
+                        maxOTU = j;
+                    }
+                }
+                
+                otuData[i] = maxOTU;
+            }
+            
+            nSeqsPerOTU.assign(numOTUs, 0);            
+            
+            for(int i=0;i<numSeqs;i++){
+                int index = otuData[i];
+                
+                singleTau[i] = 1.0000;
+                dist[i] = 0.0000;
+                
+                aaP[index][nSeqsPerOTU[index]] = i;
+                aaI[index][nSeqsPerOTU[index]] = i;
+                
+                nSeqsPerOTU[index]++;
+            }
+            
+            //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);   
+            /*****************************************************************************************************/
+            indexFill = 0;
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { return 0; }
+                
+                cumNumSeqs[i] = indexFill;
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    seqNumber[indexFill] = aaP[i][j];
+                    seqIndex[indexFill] = aaI[i][j];
+                    
+                    indexFill++;
+                }
+            }
+            /*****************************************************************************************************/
+
+            /*****************************************************************************************************/
+
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       vector<int> otuCounts(numOTUs, 0);
+                       for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
+                       
+                       //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);       
+            /*****************************************************************************************************/
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                double count = 0;
+                int position = 0;
+                int minFlowGram = 100000000;
+                double minFlowValue = 1e8;
+                change[i] = 0; //FALSE
+                
+                for(int j=0;j<nSeqsPerOTU[i];j++){
+                    count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+                }
+                
+                if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                    vector<double> adF(nSeqsPerOTU[i]);
+                    vector<int> anL(nSeqsPerOTU[i]);
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        int nI = seqIndex[index];
+                        int nIU = mapSeqToUnique[nI];
+                        
+                        int k;
+                        for(k=0;k<position;k++){
+                            if(nIU == anL[k]){
+                                break;
+                            }
+                        }
+                        if(k == position){
+                            anL[position] = nIU;
+                            adF[position] = 0.0000;
+                            position++;
+                        }                                              
+                    }
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int index = cumNumSeqs[i] + j;
+                        int nI = seqIndex[index];
+                        
+                        double tauValue = singleTau[seqNumber[index]];
+                        
+                        for(int k=0;k<position;k++){
+                            // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
+                            /*****************************************************************************************************/
+                            int flowAValue = anL[k] * numFlowCells;
+                            int flowBValue = nI * numFlowCells;
+                            
+                            double dist = 0;
+                            
+                            for(int l=0;l<lengths[nI];l++){
+                                dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                                flowAValue++;
+                                flowBValue++;
+                            }
+                            
+                            dist = dist / (double)lengths[nI];
+                            /*****************************************************************************************************/
+                            adF[k] += dist * tauValue;
+                        }
+                    }
+                    
+                    for(int j=0;j<position;j++){
+                        if(adF[j] < minFlowValue){
+                            minFlowGram = j;
+                            minFlowValue = adF[j];
+                        }
+                    }
+                    
+                    if(centroids[i] != anL[minFlowGram]){
+                        change[i] = 1;
+                        centroids[i] = anL[minFlowGram];
+                    }
+                }
+                else if(centroids[i] != -1){
+                    change[i] = 1;
+                    centroids[i] = -1;                 
+                }
+            }
+
+            /*****************************************************************************************************/
+
+            if (pDataArray->m->control_pressed) { break; }
+            
+                       //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); 
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            string thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
+            
+            ofstream qualityFile;
+            pDataArray->m->openOutputFile(qualityFileName, qualityFile);
+            
+            qualityFile.setf(ios::fixed, ios::floatfield);
+            qualityFile.setf(ios::showpoint);
+            qualityFile << setprecision(6);
+            
+            vector<vector<int> > qualities(numOTUs);
+            vector<double> pr(HOMOPS, 0);
+            
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                int index = 0;
+                int base = 0;
+                
+                if(nSeqsPerOTU[i] > 0){
+                    qualities[i].assign(1024, -1);
+                    
+                    while(index < numFlowCells){
+                        double maxPrValue = 1e8;
+                        short maxPrIndex = -1;
+                        double count = 0.0000;
+                        
+                        pr.assign(HOMOPS, 0);
+                        
+                        for(int j=0;j<nSeqsPerOTU[i];j++){
+                            int lIndex = cumNumSeqs[i] + j;
+                            double tauValue = singleTau[seqNumber[lIndex]];
+                            int sequenceIndex = aaI[i][j];
+                            short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
+                            
+                            count += tauValue;
+                            
+                            for(int s=0;s<HOMOPS;s++){
+                                pr[s] += tauValue * pDataArray->singleLookUp[s * NUMBINS + intensity];
+                            }
+                        }
+                        
+                        maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
+                        maxPrValue = pr[maxPrIndex];
+                        
+                        if(count > MIN_COUNT){
+                            double U = 0.0000;
+                            double norm = 0.0000;
+                            
+                            for(int s=0;s<HOMOPS;s++){
+                                norm += exp(-(pr[s] - maxPrValue));
+                            }
+                            
+                            for(int s=1;s<=maxPrIndex;s++){
+                                int value = 0;
+                                double temp = 0.0000;
+                                
+                                U += exp(-(pr[s-1]-maxPrValue))/norm;
+                                
+                                if(U>0.00){
+                                    temp = log10(U);
+                                }
+                                else{
+                                    temp = -10.1;
+                                }
+                                temp = floor(-10 * temp);
+                                value = (int)floor(temp);
+                                if(value > 100){       value = 100;    }
+                                
+                                qualities[i][base] = (int)value;
+                                base++;
+                            }
+                        }
+                        
+                        index++;
+                    }
+                }
+                
+                
+                if(otuCounts[i] > 0){
+                    qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
+                    
+                    int j=4;   //need to get past the first four bases
+                    while(qualities[i][j] != -1){
+                        qualityFile << qualities[i][j] << ' ';
+                        j++;
+                    }
+                    qualityFile << endl;
+                }
+            }
+            qualityFile.close();
+            pDataArray->outputNames.push_back(qualityFileName);
+            /*****************************************************************************************************/
+
+           // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
+            ofstream fastaFile;
+            pDataArray->m->openOutputFile(fastaFileName, fastaFile);
+            
+            vector<string> names(numOTUs, "");
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                int index = centroids[i];
+                
+                if(otuCounts[i] > 0){
+                    fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
+                    
+                    string newSeq = "";
+                    
+                    for(int j=0;j<numFlowCells;j++){
+                        
+                        char base = pDataArray->flowOrder[j % 4];
+                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
+                            newSeq += base;
+                        }
+                    }
+                    
+                    fastaFile << newSeq.substr(4) << endl;
+                }
+            }
+            fastaFile.close();
+            
+            pDataArray->outputNames.push_back(fastaFileName);
+            
+            if(pDataArray->thisCompositeFASTAFileName != ""){
+                pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
+            }
+
+            /*****************************************************************************************************/
+
+            //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                               
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
+            ofstream nameFileOut;
+            pDataArray->m->openOutputFile(nameFileName, nameFileOut);
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) { break; }
+                
+                if(otuCounts[i] > 0){
+                    nameFileOut << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
+                    
+                    for(int j=1;j<nSeqsPerOTU[i];j++){
+                        nameFileOut << ',' << seqNameVector[aaI[i][j]];
+                    }
+                    
+                    nameFileOut << endl;
+                }
+            }
+            nameFileOut.close();
+            pDataArray->outputNames.push_back(nameFileName);
+            
+            
+            if(pDataArray->thisCompositeNameFileName != ""){
+                pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
+            }          
+            /*****************************************************************************************************/
+
+            //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                        
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
+            ofstream otuCountsFile;
+            pDataArray->m->openOutputFile(otuCountsFileName, otuCountsFile);
+            
+            string bases = pDataArray->flowOrder;
+            
+            for(int i=0;i<numOTUs;i++){
+                
+                if (pDataArray->m->control_pressed) {
+                    break;
+                }
+                //output the translated version of the centroid sequence for the otu
+                if(otuCounts[i] > 0){
+                    int index = centroids[i];
+                    
+                    otuCountsFile << "ideal\t";
+                    for(int j=8;j<numFlowCells;j++){
+                        char base = bases[j % 4];
+                        for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
+                            otuCountsFile << base;
+                        }
+                    }
+                    otuCountsFile << endl;
+                    
+                    for(int j=0;j<nSeqsPerOTU[i];j++){
+                        int sequence = aaI[i][j];
+                        otuCountsFile << seqNameVector[sequence] << '\t';
+                        
+                        string newSeq = "";
+                        
+                        for(int k=0;k<lengths[sequence];k++){
+                            char base = bases[k % 4];
+                            int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
+                            
+                            for(int s=0;s<freq;s++){
+                                newSeq += base;
+                                //otuCountsFile << base;
+                            }
+                        }
+                        otuCountsFile << newSeq.substr(4) << endl;
+                    }
+                    otuCountsFile << endl;
+                }
+            }
+            otuCountsFile.close();
+            pDataArray->outputNames.push_back(otuCountsFileName);
+            /*****************************************************************************************************/
+
+            //writeGroups(flowFileName, numSeqs, seqNameVector);                                               
+            if (pDataArray->m->control_pressed) { break; }
+            /*****************************************************************************************************/
+            thisOutputDir = pDataArray->outputDir;
+            if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
+            string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
+            string groupFileName = fileRoot + "shhh.groups";
+            ofstream groupFile;
+            pDataArray->m->openOutputFile(groupFileName, groupFile);
+            
+            for(int i=0;i<numSeqs;i++){
+                if (pDataArray->m->control_pressed) { break; }
+                groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+            }
+            groupFile.close();
+            pDataArray->outputNames.push_back(groupFileName);
+            /*****************************************************************************************************/
+
+            pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                
-               ofstream distFile(pDataArray->distFileName.c_str());
-               distFile << outStream.str();            
-               distFile.close();
-               
-               if (pDataArray->m->control_pressed) {}
-               else {
-                       pDataArray->m->mothurOut(toString(pDataArray->stopSeq-1) + "\t" + toString(time(NULL) - begTime));
-                       pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
-                       pDataArray->m->mothurOutEndLine();
-               }               
+        if (pDataArray->m->control_pressed) { for (int i = 0; i < pDataArray->outputNames.size(); i++) { pDataArray->m->mothurRemove(pDataArray->outputNames[i]); } return 0; }
+        
+        return 0;
                
        }
        catch(exception& e) {
-               pDataArray->m->errorOut(e, "ShhherCommand", "MyflowDistParentForkThreadFunction");
+               pDataArray->m->errorOut(e, "ShhherCommand", "ShhhFlowsThreadFunction");
                exit(1);
        }
 } 
index ca1ff42a11c6bcbd689e0e49e2cdfb36604e0807..3796801a73c852bd61b6785d453d1cce825a0e66 100644 (file)
 #include "mothur.h"
 #include "database.hpp"
 #include "suffixtree.hpp"
-//class SuffixTree;
 
 class SuffixDB : public Database {
        
 public:
        SuffixDB(int);
        SuffixDB();
-       SuffixDB(const SuffixDB& sdb) : count(sdb.count), Database(sdb) {
-               for (int i = 0; i < sdb.suffixForest.size(); i++) {
-                       SuffixTree temp(sdb.suffixForest[i]);
-                       suffixForest.push_back(temp);
-               }
-       }
        ~SuffixDB();
        
        void generateDB() {}; //adding sequences generates the db
index 1e078a63a759a99b4b47ed123526da752e2404bd..6a22c4d4359b8ee6120c3e683b2d14318fd0c05c 100644 (file)
@@ -25,7 +25,6 @@ class SuffixNode {
        
 public:
        SuffixNode(int, int, int);
-       SuffixNode(const SuffixNode& sn) : parentNode(sn.parentNode), startCharPosition(sn.startCharPosition), endCharPosition(sn.endCharPosition) {m = MothurOut::getInstance();}
        virtual ~SuffixNode() {}
        virtual void print(string, int) = 0;
        virtual void setChildren(char, int);
@@ -63,7 +62,6 @@ class SuffixBranch : public SuffixNode {
        
 public:
        SuffixBranch(int, int, int);
-       SuffixBranch(const SuffixBranch& sb) : suffixNode(sb.suffixNode), childNodes(sb.childNodes), SuffixNode(sb.parentNode, sb.startCharPosition, sb.endCharPosition) {}
        ~SuffixBranch() {}
        void print(string, int);                //      need a special method for printing the node because there are children
        void eraseChild(char);                  //      need a special method for erasing the children
index fd18109513bea9f59590e16a1c705128f61ca43e..9cd835185c31a61bd024590f5a3fc7396d31610f 100644 (file)
@@ -33,25 +33,6 @@ inline bool compareParents(SuffixNode* left, SuffixNode* right){//   this is neces
        return (left->getParentNode() < right->getParentNode());        //      nodes in order of their parent
 }
 
-//********************************************************************************************************************
-SuffixTree::SuffixTree(const SuffixTree& st) : root(st.root), activeEndPosition(st.activeEndPosition), activeStartPosition(st.activeStartPosition), activeNode(st.activeNode),
-                                                                                               nodeCounter(st.nodeCounter), seqName(st.seqName), sequence(st.sequence) { 
-       try {
-               m = MothurOut::getInstance(); 
-               
-               for (int i = 0; i < st.nodeVector.size(); i++) {
-                       SuffixNode* temp = new SuffixBranch(*((SuffixBranch*)st.nodeVector[i]));
-                       nodeVector.push_back(temp);
-               }
-               
-               
-       }catch(exception& e) {
-               m->errorOut(e, "SuffixTree", "SuffixTree");
-               exit(1);
-       }
-}
 //********************************************************************************************************************
 
 SuffixTree::SuffixTree(){ m = MothurOut::getInstance(); }
index 492db54772ddcc1b2e2d60ea46c1700529453e22..d2b69e42325777d508a23000b0bc0924e0cc34f9 100644 (file)
@@ -36,8 +36,6 @@ class SuffixTree {
 public:
        SuffixTree();
        ~SuffixTree();
-//     SuffixTree(string, string);
-       SuffixTree(const SuffixTree&);
 
        void loadSequence(Sequence);
        string getSeqName();
index b99373510ba4d6e9d3e0d323021178227ede92e7..d9d71aea2dd1859d6e198302325fb51d2fdf9367 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -706,7 +706,7 @@ void Tree::randomLabels(string groupA, string groupB) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+**************************************************************************************************/
 void Tree::randomBlengths()  {
        try {
                for(int i=numNodes-1;i>=0;i--){
index f0b5a80880240e78674d6d97198ba7d47c8793ab..245cfdcac8819d076782b85795300c0dfb034462 100644 (file)
 #include "needlemanoverlap.hpp"
 
 
+/********************************************************************/
+//strip, pdiffs, bdiffs, primers, barcodes, revPrimers
+TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
+       try {
+               m = MothurOut::getInstance();
+               
+               pdiffs = p;
+               bdiffs = b;
+        ldiffs = l;
+        sdiffs = s;
+               
+               barcodes = br;
+               primers = pr;
+               revPrimer = r;
+        linker = lk;
+        spacer = sp;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "TrimOligos");
+               exit(1);
+       }
+}
 /********************************************************************/
 //strip, pdiffs, bdiffs, primers, barcodes, revPrimers
 TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
@@ -69,9 +91,9 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                        
                        Alignment* alignment;
                        if (barcodes.size() > 0) {
-                               map<string,int>::iterator it=barcodes.begin();
+                               map<string,int>::iterator it
                                
-                               for(it;it!=barcodes.end();it++){
+                               for(it=barcodes.begin();it!=barcodes.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -296,9 +318,9 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
                        
                        Alignment* alignment;
                        if (primers.size() > 0) {
-                               map<string,int>::iterator it=primers.begin();
+                               map<string,int>::iterator it
                                
-                               for(it;it!=primers.end();it++){
+                               for(it=primers.begin();it!=primers.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -375,7 +397,7 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
        }
 }
 //*******************************************************************/
-int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
+int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
        try {
                string rawSequence = seq.getUnaligned();
                int success = pdiffs + 1;       //guilty until proven innocent
@@ -390,9 +412,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
                        
                        if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
                                group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
+                               if (!keepForward) { seq.setUnaligned(rawSequence.substr(oligo.length())); }
                                if(qual.getName() != ""){
-                                       qual.trimQScores(oligo.length(), -1);
+                                       if (!keepForward) {  qual.trimQScores(oligo.length(), -1); }
                                }
                                success = 0;
                                break;
@@ -408,9 +430,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
                        
                        Alignment* alignment;
                        if (primers.size() > 0) {
-                               map<string,int>::iterator it=primers.begin();
+                               map<string,int>::iterator it
                                
-                               for(it;it!=primers.end();it++){
+                               for(it=primers.begin();it!=primers.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -470,9 +492,9 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
                        else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
                        else{                                                                                                   //use the best match
                                group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
+                               if (!keepForward) { seq.setUnaligned(rawSequence.substr(minPos)); }
                                if(qual.getName() != ""){
-                                       qual.trimQScores(minPos, -1);
+                                       if (!keepForward) { qual.trimQScores(minPos, -1); }
                                }
                                success = minDiff;
                        }
@@ -550,6 +572,129 @@ bool TrimOligos::stripReverse(Sequence& seq){
                exit(1);
        }
 }
+//******************************************************************/
+bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
+       try {
+               string rawSequence = seq.getUnaligned();
+               bool success = 0;       //guilty until proven innocent
+               
+               for(int i=0;i<linker.size();i++){
+                       string oligo = linker[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = 0;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
+                               if(qual.getName() != ""){
+                                       qual.trimQScores(-1, rawSequence.length()-oligo.length());
+                               }
+                               success = 1;
+                               break;
+                       }
+               }       
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripLinker");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool TrimOligos::stripLinker(Sequence& seq){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               bool success = 0;       //guilty until proven innocent
+               
+               for(int i=0;i<linker.size();i++){
+                       string oligo = linker[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = 0;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
+                               success = 1;
+                               break;
+                       }
+               }       
+               
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripLinker");
+               exit(1);
+       }
+}
+
+//******************************************************************/
+bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
+       try {
+               string rawSequence = seq.getUnaligned();
+               bool success = 0;       //guilty until proven innocent
+               
+               for(int i=0;i<spacer.size();i++){
+                       string oligo = spacer[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = 0;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
+                               if(qual.getName() != ""){
+                                       qual.trimQScores(-1, rawSequence.length()-oligo.length());
+                               }
+                               success = 1;
+                               break;
+                       }
+               }       
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripSpacer");
+               exit(1);
+       }
+}
+//******************************************************************/
+bool TrimOligos::stripSpacer(Sequence& seq){
+       try {
+               
+               string rawSequence = seq.getUnaligned();
+               bool success = 0;       //guilty until proven innocent
+               
+               for(int i=0;i<spacer.size();i++){
+                       string oligo = spacer[i];
+                       
+                       if(rawSequence.length() < oligo.length()){
+                               success = 0;
+                               break;
+                       }
+                       
+                       if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
+                               seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
+                               success = 1;
+                               break;
+                       }
+               }       
+               
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripSpacer");
+               exit(1);
+       }
+}
 
 //******************************************************************/
 bool TrimOligos::compareDNASeq(string oligo, string seq){
index 8830dff57af8985c4f0e0bd4464f0860a46e8b81..e3ea7d55537e323f3869a6157ff3aed4bbd99eea 100644 (file)
 class TrimOligos {
        
        public:
-               TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
+        TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
+               TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer
                ~TrimOligos();
        
                int stripBarcode(Sequence&, int&);      
                int stripBarcode(Sequence&, QualityScores&, int&);
        
                int stripForward(Sequence&, int&);
-               int stripForward(Sequence&, QualityScores&, int&);
+               int stripForward(Sequence&, QualityScores&, int&, bool);
        
                bool stripReverse(Sequence&);
                bool stripReverse(Sequence&, QualityScores&);
+    
+        bool stripLinker(Sequence&);
+        bool stripLinker(Sequence&, QualityScores&);
+    
+        bool stripSpacer(Sequence&);
+        bool stripSpacer(Sequence&, QualityScores&);
                                
        
        private:
-               int pdiffs, bdiffs;
+               int pdiffs, bdiffs, ldiffs, sdiffs;
        
                map<string, int> barcodes;
                map<string, int> primers;
                vector<string> revPrimer;
+        vector<string> linker;
+        vector<string> spacer;
        
                MothurOut* m;
        
index dd3427b9a9733285e5e793f78cabe5bdc3ed1cf3..ae9f707aa2fe5ace59e59d1697a8bf6e50319421 100644 (file)
@@ -25,9 +25,12 @@ vector<string> TrimSeqsCommand::setParameters(){
                CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
+               CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
                CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
                CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
                CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
@@ -64,9 +67,11 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
                helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
-               helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
+               helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The qfile parameter allows you to provide a quality file.\n";
                helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
                helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
@@ -75,6 +80,7 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
                helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
                helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+               helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
                helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
                helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
                helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
@@ -229,11 +235,17 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, pdiffs);
+            
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
                        m->mothurConvert(temp, tdiffs);
                        
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
                        
                        temp = validParameter.validFile(parameters, "qfile", true);     
                        if (temp == "not found")        {       qFileName = "";         }
@@ -274,6 +286,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
                        allFiles = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
+                       keepforward = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
@@ -545,7 +560,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                int count = 0;
                bool moreSeqs = 1;
-               TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
+               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
        
                while (moreSeqs) {
                                
@@ -578,14 +593,24 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                int barcodeIndex = 0;
                                int primerIndex = 0;
                                
+                if(numLinkers != 0){
+                                       success = trimOligos.stripLinker(currSeq, currQual);
+                                       if(!success)                            {       trashCode += 'k';       }
+                               }
+                
                                if(barcodes.size() != 0){
                                        success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
                                
+                if(numSpacers != 0){
+                                       success = trimOligos.stripSpacer(currSeq, currQual);
+                                       if(!success)                            {       trashCode += 's';       }
+                               }
+                
                                if(numFPrimers != 0){
-                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex);
+                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
                                        if(success > pdiffs)            {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
@@ -1101,6 +1126,10 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                                
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
                                }
                                else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
                        }
@@ -1192,6 +1221,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                }
                numFPrimers = primers.size();
                numRPrimers = revPrimer.size();
+        numLinkers = linker.size();
+        numSpacers = spacer.size();
                
                bool allBlank = true;
                for (int i = 0; i < barcodeNameVector.size(); i++) {
index 137cb735719fba42f0d103af842de80c9930cc3e..b6050f2ee58a3db6f56714e7f313fa9ed93b77b5 100644 (file)
@@ -52,8 +52,8 @@ private:
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
        
-       bool flip, allFiles, qtrim;
-       int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, comboStarts;
+       bool flip, allFiles, qtrim, keepforward;
+       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
        vector<string> revPrimer, outputNames;
@@ -61,6 +61,8 @@ private:
        map<string, int> barcodes;
        vector<string> groupVector;
        map<string, int> primers;
+    vector<string>  linker;
+    vector<string>  spacer;
        map<string, int> combos;
        map<string, int> groupToIndex;
        vector<string> primerNameVector;        //needed here?