]> git.donarmstrong.com Git - mothur.git/commitdiff
added shhh.seqs command
authorpschloss <pschloss>
Fri, 31 Dec 2010 16:35:11 +0000 (16:35 +0000)
committerpschloss <pschloss>
Fri, 31 Dec 2010 16:35:11 +0000 (16:35 +0000)
13 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
distancecommand.cpp
flowdata.cpp
flowdata.h
makefile
mothur
sffinfocommand.cpp
shhhercommand.cpp [new file with mode: 0644]
shhhercommand.h [new file with mode: 0644]
trimflowscommand.cpp
trimflowscommand.h
trimseqscommand.cpp

index febae68390a4245b6c1dcb0ad37bceda1962f39e..72be7732a88856925aa54c085ed92389c1272d0a 100644 (file)
                7E85BD1D11EB5E9B00FD37C0 /* qualityscores.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qualityscores.cpp; sourceTree = "<group>"; };
                7E962A40121F76B1007464B5 /* invsimpson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = invsimpson.h; sourceTree = "<group>"; };
                7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = "<group>"; };
+               7E99CA1312C8B0970041246B /* shhhercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shhhercommand.h; sourceTree = "<group>"; };
+               7E99CA1412C8B0970041246B /* shhhercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shhhercommand.cpp; sourceTree = "<group>"; };
+               7E99D77C12CBAD780041246B /* untitled.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = untitled.h; path = ../untitled.h; sourceTree = SOURCE_ROOT; };
+               7E99D77D12CBAD780041246B /* untitled.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = untitled.cpp; path = ../untitled.cpp; sourceTree = SOURCE_ROOT; };
                7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = "<group>"; };
                7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = "<group>"; };
                7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = "<group>"; };
                08FB7794FE84155DC02AAC07 /* mothur */ = {
                        isa = PBXGroup;
                        children = (
+                               7E99D77C12CBAD780041246B /* untitled.h */,
+                               7E99D77D12CBAD780041246B /* untitled.cpp */,
                                A768D95D1248FEAA008AB1D0 /* mothur */,
                                A7639F8D1175DF35008F5578 /* makefile */,
                                A7DA1FF0113FECD400BF472F /* alignment.cpp */,
                A7CB593B11402EF90010EB83 /* calculators */ = {
                        isa = PBXGroup;
                        children = (
+                               7E99CA1312C8B0970041246B /* shhhercommand.h */,
+                               7E99CA1412C8B0970041246B /* shhhercommand.cpp */,
                                A7DA200B113FECD400BF472F /* calculator.cpp */,
                                A7DA200C113FECD400BF472F /* calculator.h */,
                                A7DA1FED113FECD400BF472F /* ace.h */,
index 5d9679a498f70eea896664bad38c057710fd01bd..7d47491f7e32a3dc2adf518aa5d97cc2265f7121 100644 (file)
 #include "consensusseqscommand.h"
 #include "trimflowscommand.h"
 #include "corraxescommand.h"
+#include "shhhercommand.h"
 
 /*******************************************************/
 
@@ -229,6 +230,7 @@ CommandFactory::CommandFactory(){
        commands["screen.seqs"]                 = "MPIEnabled";
        commands["summary.seqs"]                = "MPIEnabled";
        commands["cluster.split"]               = "MPIEnabled";
+       commands["shhh.seqs"]                   = "MPIEnabled";
        commands["sens.spec"]                   = "sens.spec";
        commands["seq.error"]                   = "seq.error";
        commands["quit"]                                = "MPIEnabled"; 
@@ -313,6 +315,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "reverse.seqs")                  {       command = new ReverseSeqsCommand(optionString);                         }
                else if(commandName == "trim.seqs")                             {       command = new TrimSeqsCommand(optionString);                            }
                else if(commandName == "trim.flows")                    {       command = new TrimFlowsCommand(optionString);                           }
+               else if(commandName == "shhh.seqs")                             {       command = new ShhherCommand(optionString);                                      }
                else if(commandName == "chimera.seqs")                  {       command = new ChimeraSeqsCommand(optionString);                         }
                else if(commandName == "list.seqs")                             {       command = new ListSeqsCommand(optionString);                            }
                else if(commandName == "get.seqs")                              {       command = new GetSeqsCommand(optionString);                                     }
@@ -437,6 +440,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "reverse.seqs")                  {       pipecommand = new ReverseSeqsCommand(optionString);                             }
                else if(commandName == "trim.seqs")                             {       pipecommand = new TrimSeqsCommand(optionString);                                }
                else if(commandName == "trim.flows")                    {       pipecommand = new TrimFlowsCommand(optionString);                               }
+               else if(commandName == "shhh.seqs")                             {       pipecommand = new ShhherCommand(optionString);                                  }
                else if(commandName == "chimera.seqs")                  {       pipecommand = new ChimeraSeqsCommand(optionString);                             }
                else if(commandName == "list.seqs")                             {       pipecommand = new ListSeqsCommand(optionString);                                }
                else if(commandName == "get.seqs")                              {       pipecommand = new GetSeqsCommand(optionString);                                 }
@@ -548,6 +552,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "reverse.seqs")                  {       shellcommand = new ReverseSeqsCommand();                        }
                else if(commandName == "trim.seqs")                             {       shellcommand = new TrimSeqsCommand();                           }
                else if(commandName == "trim.flows")                    {       shellcommand = new TrimFlowsCommand();                          }
+               else if(commandName == "shhh.seqs")                             {       shellcommand = new ShhherCommand();                                     }
                else if(commandName == "chimera.seqs")                  {       shellcommand = new ChimeraSeqsCommand();                        }
                else if(commandName == "list.seqs")                             {       shellcommand = new ListSeqsCommand();                           }
                else if(commandName == "get.seqs")                              {       shellcommand = new GetSeqsCommand();                            }
index 4cb98419fd23f540e174b068dd9a832b79fb3036..3711a1d52c796006a354e3a341e4ba250fd8cf20 100644 (file)
@@ -314,7 +314,7 @@ int DistanceCommand::execute(){
                        //delete filename;
 
                        if (pid == 0) { //you are the root process 
-                       
+                               
                                //do your part
                                string outputMyPart;
                                
index f40fdd64b21aa0ad2dc2beae7e01e942ba46dc23..a172a03337303f287c64ed0f8e7352cdfde3db57 100644 (file)
@@ -15,43 +15,49 @@ FlowData::FlowData(){}
 
 //**********************************************************************************************************************
 
-FlowData::FlowData(ifstream& flowFile, float signal, float noise, int maxHomoP){
+FlowData::~FlowData(){ /*      do nothing      */      }
+
+//**********************************************************************************************************************
+
+FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP) : 
+                       numFlows(numFlows), signalIntensity(signal), noiseIntensity(noise), maxHomoP(maxHomoP){
 
        try {
                m = MothurOut::getInstance();
 
+               flowData.assign(numFlows, 0);
                baseFlow = "TACG";
                seqName = "";
-               numFlows = 0;
                locationString = "";
-               seqLength = 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FlowData", "FlowData");
+               exit(1);
+       }
+       
+}
+
+//**********************************************************************************************************************
+
+bool FlowData::getNext(ifstream& flowFile){
+       
+       try {
                
                string lengthString;
                string flowString;
                
-               flowFile >> seqName >> locationString >> lengthString >> flowString;
-
-               convert(lengthString.substr(7), seqLength);
-               convert(flowString.substr(9), numFlows);
-
-               flowData.resize(numFlows);
+               flowFile >> seqName >> endFlow;         
+               for(int i=0;i<numFlows;i++)     {       flowFile >> flowData[i];        }
                
-               if (seqName == "") {
-                       m->mothurOut("Error reading quality file, name blank at position, " + toString(flowFile.tellg()));
-                       m->mothurOutEndLine(); 
-               }
-               else{
-                       seqName = seqName.substr(1);
-                       for(int i=0;i<numFlows;i++)     {       flowFile >> flowData[i];        }
-               }
-               
-               findDeadSpot(signal, noise, maxHomoP);
+               updateEndFlow();
                translateFlow();
                
                m->gobble(flowFile);
+               if(flowFile){   return 1;       }
+               else            {       return 0;       }
        }
        catch(exception& e) {
-               m->errorOut(e, "FlowData", "FlowData");
+               m->errorOut(e, "FlowData", "getNext");
                exit(1);
        }
        
@@ -59,19 +65,20 @@ FlowData::FlowData(ifstream& flowFile, float signal, float noise, int maxHomoP){
 
 //**********************************************************************************************************************
 
-void FlowData::findDeadSpot(float signalIntensity, float noiseIntensity, int maxHomoP){
+void FlowData::updateEndFlow(){
        try{
                
                int currLength = 0;
                float maxIntensity = (float) maxHomoP + 0.49;
                
-               deadSpot = 0;
-               while(currLength < seqLength + 4){
+               int deadSpot = 0;
+                               
+               while(deadSpot < endFlow){
                        int signal = 0;
                        int noise = 0;
                        
                        for(int i=0;i<4;i++){
-                               float intensity = flowData[i + 4 * deadSpot];
+                               float intensity = flowData[i + deadSpot];
                                if(intensity > signalIntensity){
                                        signal++;
 
@@ -79,18 +86,16 @@ void FlowData::findDeadSpot(float signalIntensity, float noiseIntensity, int max
                                                noise++;
                                        }
                                }
-                               currLength += (int)(intensity+0.5);
                        }
 
                        if(noise > 0 || signal == 0){
                                break;
                        }
                
-                       deadSpot++;
+                       deadSpot += 4;
                }
-               deadSpot *= 4;
-               seqLength = currLength;
-               
+               endFlow = deadSpot;
+
        }
        catch(exception& e) {
                m->errorOut(e, "FlowData", "findDeadSpot");
@@ -104,7 +109,7 @@ void FlowData::translateFlow(){
        
        try{
                sequence = "";
-               for(int i=0;i<deadSpot;i++){
+               for(int i=0;i<endFlow;i++){
                        int intensity = (int)(flowData[i] + 0.5);
                        char base = baseFlow[i % 4];
                        
@@ -128,12 +133,12 @@ void FlowData::translateFlow(){
 
 //**********************************************************************************************************************
 
-void FlowData::capFlows(int maxFlows){
+void FlowData::capFlows(int mF){
        
        try{
                
-               numFlows = maxFlows;
-               if(deadSpot > maxFlows){        deadSpot = maxFlows;    }
+               maxFlows = mF;
+               if(endFlow > maxFlows){ endFlow = maxFlows;     }               
                
        }
        catch(exception& e) {
@@ -148,8 +153,8 @@ bool FlowData::hasMinFlows(int minFlows){
        
        try{
                bool pastMin = 0;
-               
-               if(deadSpot >= minFlows){       pastMin = 1;    }
+               if(endFlow >= minFlows){        pastMin = 1;    }
+
                return pastMin;
        }
        catch(exception& e) {
@@ -173,25 +178,12 @@ Sequence FlowData::getSequence(){
 
 //**********************************************************************************************************************
 
-int FlowData::getSeqLength(){
-       
-       try{
-               return seqLength;               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "FlowData", "getSeqLength");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
 void FlowData::printFlows(ofstream& outFlowFile){
        try{
-       //      outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl;
-               outFlowFile << seqName << ' ' << deadSpot << ' ' << setprecision(2);
+//     outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl;
+               outFlowFile << seqName << ' ' << endFlow << ' ' << setprecision(2);
 
-               for(int i=0;i<numFlows;i++){
+               for(int i=0;i<maxFlows;i++){
                        outFlowFile << flowData[i] << ' ';
                }
                outFlowFile << endl;
@@ -206,10 +198,7 @@ void FlowData::printFlows(ofstream& outFlowFile){
 
 void FlowData::printFlows(ofstream& outFlowFile, string scrapCode){
        try{
-       
-       //      outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl;
-
-               outFlowFile << seqName << '|' << scrapCode << ' ' << deadSpot << ' ' << setprecision(2);
+               outFlowFile << seqName << '|' << scrapCode << ' ' << endFlow << ' ' << setprecision(2);
                
                for(int i=0;i<numFlows;i++){
                        outFlowFile << flowData[i] << ' ';
@@ -222,20 +211,4 @@ void FlowData::printFlows(ofstream& outFlowFile, string scrapCode){
        }
 }
 
-//**********************************************************************************************************************
-
-void FlowData::printFASTA(ofstream& outFASTA){
-       try{
-               
-               outFASTA << '>' << seqName << endl;
-               outFASTA << sequence << endl;
-
-       }
-       catch(exception& e) {
-               m->errorOut(e, "FlowData", "printFlows");
-               exit(1);
-       }
-}
-
-
 //**********************************************************************************************************************
index dafeab3e034b419ba7d4efcc998095cd984e014a..09426f957f9a4ddee425fedd117d03786c9869ad 100644 (file)
@@ -18,24 +18,25 @@ class FlowData {
 
 public:
        FlowData();
-       FlowData(ifstream&, float, float, int);
-       ~FlowData(){};
+       FlowData(int, float, float, int);
+       ~FlowData();
+       bool getNext(ifstream&);
+
        void capFlows(int);
        bool hasMinFlows(int);
        Sequence getSequence();
-       
-       int getSeqLength();
+
        void printFlows(ofstream&);
        void printFlows(ofstream&, string);
-       void printFASTA(ofstream&);
 private:
        MothurOut* m;
-
-       void findDeadSpot(float, float, int);
-       void translateFlow();
        
+       void updateEndFlow();
+       void translateFlow();
+       float signalIntensity, noiseIntensity;
+       int maxHomoP;
        string seqName, locationString, sequence, baseFlow;
-       int numFlows, seqLength, deadSpot;
+       int numFlows, maxFlows, endFlow;
        vector<float> flowData;
 };
 
index aa20f8ca67d6594234f9de806556770fa79576ee..5eb8daaab6d8163b1b48852ccef7e21a406b28dc 100644 (file)
--- a/makefile
+++ b/makefile
@@ -58,7 +58,7 @@ ifeq  ($(strip $(USEREADLINE)),yes)
       -lncurses
 endif
 
-USEMPI ?= no
+USEMPI ?= yes
 
 ifeq  ($(strip $(USEMPI)),yes)
     CXX = mpic++
@@ -93,7 +93,7 @@ mothur : $(OBJECTS)
        strip mothur
 
 install : mothur
-       cp mothur ../Release/mothur
+#      cp mothur ../Release/mothur
        
 %.o : %.c %.h
        $(COMPILE.c) $(OUTPUT_OPTION) $<
diff --git a/mothur b/mothur
index 83751b3a43d400365e7d141effd70b43c6cb13bb..9feafa6ce8149074822fb2a5fd70c16fa5477311 100755 (executable)
Binary files a/mothur and b/mothur differ
index 1a9b73f18c5abd36a0bb6ca98ce9658bda37665a..3562dcc3e88265a93c23af38fa69b3b2c776e4c1 100644 (file)
@@ -331,8 +331,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
                if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
        
                //print common header
-               if (sfftxt) { printCommonHeader(outSfftxt, header); }
-       
+               if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
+               if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
+                       
                //read through the sff file
                while (!in.eof()) {
                        
@@ -779,8 +780,12 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
 int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
        try {
                if(header.clipQualRight > header.clipQualLeft){
-                       out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << " numflows=" << read.flowgram.size() << endl;
-                       for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << ' ';  }
+                       
+                       int rightIndex = 0;
+                       for (int i = 0; i < header.clipQualRight; i++) {  rightIndex +=  read.flowIndex[i];     }
+
+                       out << header.name << ' ' << rightIndex;
+                       for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
                        out << endl;
                }
                
diff --git a/shhhercommand.cpp b/shhhercommand.cpp
new file mode 100644 (file)
index 0000000..24301a2
--- /dev/null
@@ -0,0 +1,2122 @@
+/*
+ *  shhher.cpp
+ *  Mothur
+ *
+ *  Created by Pat Schloss on 12/27/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "shhhercommand.h"
+
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+#include "cluster.hpp"
+#include "sparsematrix.hpp"
+
+//**********************************************************************************************************************
+
+#define NUMBINS 1000
+#define HOMOPS 10
+#define MIN_COUNT 0.1
+#define MIN_WEIGHT 0.1
+#define MIN_TAU 0.0001
+#define MIN_ITER 10
+
+//**********************************************************************************************************************
+
+vector<string> ShhherCommand::getValidParameters(){    
+       try {
+               string Array[] =  {     
+                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
+               };
+               
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getValidParameters");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+ShhherCommand::ShhherCommand(){        
+       try {
+               abort = true;
+               
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["pn.dist"] = tempOutNames;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "ShhherCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+vector<string> ShhherCommand::getRequiredParameters(){ 
+       try {
+               string Array[] =  {"flow"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+vector<string> ShhherCommand::getRequiredFiles(){      
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+ShhherCommand::ShhherCommand(string option) {
+       try {
+
+#ifdef USE_MPI
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
+
+               if(pid == 0){
+#endif
+               
+               
+               abort = false;
+               
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       
+                       //valid paramters for this command
+                       string AlignArray[] =  {
+                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
+                       };
+                       
+                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["pn.dist"] = tempOutNames;
+                       //                      outputTypes["fasta"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("flow");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["flow"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("lookup");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
+                               }
+
+                               it = parameters.find("file");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["file"] = inputDir + it->second;             }
+                               }
+                       }
+                       
+                       
+                       //check for required parameters
+                       flowFileName = validParameter.validFile(parameters, "flow", true);
+                       flowFilesFileName = validParameter.validFile(parameters, "file", true);
+                       if (flowFileName == "not found" && flowFilesFileName == "not found") {
+                               m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
+                               m->mothurOutEndLine();
+                               abort = true; 
+                       }
+                       else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
+                       }
+                       
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       string temp;
+                       temp = validParameter.validFile(parameters, "lookup", true);
+                       if (temp == "not found")        {       lookupFileName = "LookUp_E123.pat";     }
+                       else if(temp == "not open")     {       abort = true;                   } 
+                       else                                            {       lookupFileName = temp;  }
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){      temp = "1";                     }
+                       convert(temp, processors); 
+
+                       temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
+                       convert(temp, cutoff); 
+                       
+                       temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
+                       convert(temp, minDelta); 
+
+                       temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
+                       convert(temp, maxIters); 
+
+                       temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
+                       convert(temp, sigma); 
+                       
+                       globaldata = GlobalData::getInstance();
+               }
+                       
+#ifdef USE_MPI
+               }                               
+#endif
+                               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "ShhherCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+ShhherCommand::~ShhherCommand(){}
+
+//**********************************************************************************************************************
+
+void ShhherCommand::help(){
+       try {
+               m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ShhherCommand::execute(){
+       try {
+               int tag = 1976;
+               MPI_Status status; 
+
+               double begClock = clock();
+               unsigned long int begTime = time(NULL);
+
+               cout.setf(ios::fixed, ios::floatfield);
+               cout.setf(ios::showpoint);
+               cout << setprecision(2);
+               
+               
+               if(pid == 0){
+
+                       for(int i=1;i<ncpus;i++){
+                               MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                       }
+                       if(abort == 1){ return 0;       }
+
+                       processors = ncpus;
+                       
+                       cout << "\nGetting preliminary data..." << endl;
+                       getSingleLookUp();
+                       getJointLookUp();
+                                               
+                       vector<string> flowFileVector;
+                       if(flowFilesFileName != "not found"){
+                               string fName;
+
+                               ifstream flowFilesFile;
+                               m->openInputFile(flowFilesFileName, flowFilesFile);
+                               while(flowFilesFile){
+                                       flowFilesFile >> fName;
+                                       flowFileVector.push_back(fName);
+                                       m->gobble(flowFilesFile);
+                               }
+                       }
+                       else{
+                               flowFileVector.push_back(flowFileName);
+                       }
+                       int numFiles = flowFileVector.size();
+
+                       for(int i=1;i<ncpus;i++){
+                               MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                       }
+                       
+                       for(int i=0;i<numFiles;i++){
+                               flowFileName = flowFileVector[i];
+                       
+                               cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
+                               cout << "Reading flowgrams..." << endl;
+                               getFlowData();
+                               cout << "Identifying unique flowgrams..." << endl;
+                               getUniques();
+
+                               cout << "Calculating distances between flowgrams..." << endl;
+                               char fileName[1024];
+                               strcpy(fileName, flowFileName.c_str());
+
+                               for(int i=1;i<ncpus;i++){
+                                       MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
+
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                               }                               
+                                                       
+                               string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
+                               
+                               int done;
+                               for(int i=1;i<ncpus;i++){
+                                       MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                       
+                                       m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
+                                       remove((distFileName + ".temp." + toString(i)).c_str());
+                               }
+
+                               string namesFileName = createNamesFile();
+                               
+                               cout << "\nClustering flowgrams..." << endl;
+                               string listFileName = cluster(distFileName, namesFileName);
+       //                      string listFileName = "PriestPot_C7.pn.list";
+       //                      string listFileName = "test.mock_rep3.v69.pn.list";
+                       
+                               getOTUData(listFileName);
+                               initPyroCluster();
+
+                               for(int i=1;i<ncpus;i++){
+                                       MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                               }
+                               
+                               
+                               double maxDelta = 0;
+                               int iter = 0;
+                               
+                               int numOTUsOnCPU = numOTUs / ncpus;
+                               int numSeqsOnCPU = numSeqs / ncpus;
+                               
+                               cout << "\nDenoising flowgrams..." << endl;
+                               cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+                               
+                               while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+
+                                       double cycClock = clock();
+                                       unsigned long int cycTime = time(NULL);
+                                       fill();
+
+                                       int total = singleTau.size();
+                                       for(int i=1;i<ncpus;i++){
+                                               MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               
+                                               MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       }
+                               
+                                       calcCentroidsDriver(0, numOTUsOnCPU);
+                                       
+                                       for(int i=1;i<ncpus;i++){
+                                               int otuStart = i * numOTUs / ncpus;
+                                               int otuStop = (i + 1) * numOTUs / ncpus;
+                                               
+                                               vector<int> tempCentroids(numOTUs, 0);
+                                               vector<short> tempChange(numOTUs, 0);
+                                               
+                                               MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                               MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
+                                               
+                                               for(int j=otuStart;j<otuStop;j++){
+                                                       centroids[j] = tempCentroids[j];
+                                                       change[j] = tempChange[j];
+                                               }
+                                       }
+                                                                       
+                                       maxDelta = getNewWeights();
+                                       double nLL = getLikelihood();
+                                       checkCentroids();
+                                       
+                                       for(int i=1;i<ncpus;i++){
+                                               MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
+                                       }
+                                       
+                                       calcNewDistancesParent(0, numSeqsOnCPU);
+
+                                       total = singleTau.size();
+
+                                       for(int i=1;i<ncpus;i++){
+                                               int childTotal;
+                                               int seqStart = i * numSeqs / ncpus;
+                                               int seqStop = (i + 1) * numSeqs / ncpus;
+                                               
+                                               MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+
+                                               vector<int> childSeqIndex(childTotal, 0);
+                                               vector<double> childSingleTau(childTotal, 0);
+                                               vector<double> childDist(numSeqs * numOTUs, 0);
+                                               vector<int> otuIndex(childTotal, 0);
+                                               
+                                               MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                               MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
+                                               MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
+                                               MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+
+                                               int oldTotal = total;
+                                               total += childTotal;
+                                               singleTau.resize(total, 0);
+                                               seqIndex.resize(total, 0);
+                                               seqNumber.resize(total, 0);
+                                               
+                                               int childIndex = 0;
+                                               
+                                               for(int j=oldTotal;j<total;j++){
+                                                       int otuI = otuIndex[childIndex];
+                                                       int seqI = childSeqIndex[childIndex];
+                                                       
+                                                       singleTau[j] = childSingleTau[childIndex];
+                                                       
+                                                       aaP[otuI][nSeqsPerOTU[otuI]] = j;
+                                                       aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
+                                                       nSeqsPerOTU[otuI]++;
+                                                       childIndex++;
+                                               }
+                                               
+                                               int index = seqStart * numOTUs;
+                                               for(int j=seqStart;j<seqStop;j++){
+                                                       for(int k=0;k<numOTUs;k++){
+                                                               dist[index] = childDist[index];
+                                                               index++;
+                                                       }
+                                               }                                       
+                                       }
+                                       
+                                       iter++;
+                                       
+                                       cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
+
+                                       if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+                                               int live = 1;
+                                               for(int i=1;i<ncpus;i++){
+                                                       MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               }
+                                       }
+                                       else{
+                                               int live = 0;
+                                               for(int i=1;i<ncpus;i++){
+                                                       MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
+                                               }
+                                       }
+                                       
+                               }       
+                               
+                               cout << "\nFinalizing..." << endl;
+                               fill();
+                               setOTUs();
+                               vector<int> otuCounts(numOTUs, 0);
+                               for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
+                               calcCentroidsDriver(0, numOTUs);
+                               writeQualities(otuCounts);
+                               writeSequences(otuCounts);
+                               writeNames(otuCounts);
+                               writeClusters(otuCounts);
+                               writeGroups();
+                               
+                               remove(distFileName.c_str());
+                               remove(namesFileName.c_str());
+                               remove(listFileName.c_str());
+                               
+                               cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
+                       }
+
+               }
+               else{
+                       int abort = 1;
+                       bool live = 1;
+
+                       MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                       if(abort){      return 0;       }
+
+                       int numFiles;
+                       MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+
+                       for(int i=0;i<numFiles;i++){
+                               //Now into the pyrodist part
+                               char fileName[1024];
+                               MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               
+                               flowDataIntI.resize(numSeqs * numFlowCells);
+                               flowDataPrI.resize(numSeqs * numFlowCells);
+                               mapUniqueToSeq.resize(numSeqs);
+                               mapSeqToUnique.resize(numSeqs);
+                               lengths.resize(numSeqs);
+                               jointLookUp.resize(NUMBINS * NUMBINS);
+
+                               MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+
+                               flowFileName = string(fileName);
+                               int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
+                               int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
+                               
+                               string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
+
+                               int done = 1;
+                               MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+
+                               //Now into the pyronoise part
+                               MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               
+                               singleLookUp.resize(HOMOPS * NUMBINS);
+                               uniqueFlowgrams.resize(numUniques * numFlowCells);
+                               weight.resize(numOTUs);
+                               centroids.resize(numOTUs);
+                               change.resize(numOTUs);
+                               dist.assign(numOTUs * numSeqs, 0);
+                               nSeqsPerOTU.resize(numOTUs);
+                               cumNumSeqs.resize(numOTUs);
+
+                               MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
+                               MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                               
+                               int startOTU = pid * numOTUs / ncpus;
+                               int endOTU = (pid + 1) * numOTUs / ncpus;
+
+                               int startSeq = pid * numSeqs / ncpus;
+                               int endSeq = (pid + 1) * numSeqs /ncpus;
+                               
+                               int total;
+
+                               while(live){
+
+                                       MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       singleTau.assign(total, 0.0000);
+                                       seqNumber.assign(total, 0);
+                                       seqIndex.assign(total, 0);
+                                       
+                                       MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       
+                                       calcCentroidsDriver(startOTU, endOTU);
+
+                                       MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
+
+                                       
+                                       MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
+
+                                       vector<int> otuIndex(total, 0);
+                                       calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
+                                       total = otuIndex.size();
+                                       
+                                       MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                               
+                                       MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               }
+                       }
+               }               
+
+               MPI_Barrier(MPI_COMM_WORLD);
+               return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "execute");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
+       try{            
+               ostringstream outStream;
+               outStream.setf(ios::fixed, ios::floatfield);
+               outStream.setf(ios::dec, ios::basefield);
+               outStream.setf(ios::showpoint);
+               outStream.precision(6);
+               
+               int begTime = time(NULL);
+               double begClock = clock();
+               
+               for(int i=startSeq;i<stopSeq;i++){
+                       for(int j=0;j<i;j++){
+                               float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
+                               
+                               if(flowDistance < 1e-6){
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
+                               }
+                               else if(flowDistance <= cutoff){
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
+                               }
+                       }
+                       if(i % 100 == 0){
+                               cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+                       }
+               }
+               cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+               
+               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
+               if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
+
+               ofstream distFile(fDistFileName.c_str());
+               distFile << outStream.str();            
+               distFile.close();
+               
+               return fDistFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
+               exit(1);
+       }
+}
+
+#else
+//**********************************************************************************************************************
+
+int ShhherCommand::execute(){
+       try {
+               if (abort == true) { return 0; }
+               
+               cout.setf(ios::fixed, ios::floatfield);
+               cout.setf(ios::showpoint);
+               
+               getSingleLookUp();
+               getJointLookUp();
+               
+               
+               vector<string> flowFileVector;
+               if(flowFilesFileName != "not found"){
+                       string fName;
+                       
+                       ifstream flowFilesFile;
+                       m->openInputFile(flowFilesFileName, flowFilesFile);
+                       while(flowFilesFile){
+                               flowFilesFile >> fName;
+                               flowFileVector.push_back(fName);
+                               m->gobble(flowFilesFile);
+                       }
+               }
+               else{
+                       flowFileVector.push_back(flowFileName);
+               }
+               int numFiles = flowFileVector.size();
+               
+               
+               for(int i=0;i<numFiles;i++){
+                       flowFileName = flowFileVector[i];
+
+                       cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
+                       cout << "Reading flowgrams..." << endl;
+                       getFlowData();
+                       cout << "Identifying unique flowgrams..." << endl;
+                       getUniques();
+                       
+                       
+                       cout << "Calculating distances between flowgrams..." << endl;                   
+                       string distFileName = createDistFile(processors);
+                       string namesFileName = createNamesFile();
+                       
+                       cout << "\nClustering flowgrams..." << endl;
+                       string listFileName = cluster(distFileName, namesFileName);
+                       getOTUData(listFileName);
+                       
+                       initPyroCluster();
+                       
+                       double maxDelta = 0;
+                       int iter = 0;
+                       
+                       double begClock = clock();
+                       unsigned long int begTime = time(NULL);
+
+                       cout << "\nDenoising flowgrams..." << endl;
+                       cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+                       
+                       while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
+                               
+                               double cycClock = clock();
+                               unsigned long int cycTime = time(NULL);
+                               fill();
+                               
+                               calcCentroids();
+                               
+                               maxDelta = getNewWeights();
+                               double nLL = getLikelihood();
+                               checkCentroids();
+                               
+                               calcNewDistances();
+
+                               iter++;
+                               
+                               cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
+                       }       
+                       
+                       cout << "\nFinalizing..." << endl;
+                       fill();
+                       setOTUs();
+                       
+                       vector<int> otuCounts(numOTUs, 0);
+                       for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
+                       
+                       calcCentroidsDriver(0, numOTUs);
+                       writeQualities(otuCounts);
+                       writeSequences(otuCounts);
+                       writeNames(otuCounts);
+                       writeClusters(otuCounts);
+                       writeGroups();
+                       
+                       remove(distFileName.c_str());
+                       remove(namesFileName.c_str());
+                       remove(listFileName.c_str());
+                       
+                       cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
+               }
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "execute");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
+
+void ShhherCommand::getFlowData(){
+       try{
+               ifstream flowFile;
+               m->openInputFile(flowFileName, flowFile);
+               
+               string seqName;
+               
+               int currentNumFlowCells;
+               
+               float intensity;
+               
+               flowFile >> numFlowCells;
+               int index = 0;//pcluster
+               while(!flowFile.eof()){
+                       flowFile >> seqName >> currentNumFlowCells;
+                       lengths.push_back(currentNumFlowCells);
+
+                       seqNameVector.push_back(seqName);
+                       nameMap[seqName] = index++;//pcluster
+
+                       for(int i=0;i<numFlowCells;i++){
+                               flowFile >> intensity;
+                               if(intensity > 9.99)    {       intensity = 9.99;       }
+                               int intI = int(100 * intensity + 0.0001);
+                               flowDataIntI.push_back(intI);
+                       }
+                       m->gobble(flowFile);
+               }
+               flowFile.close();
+               
+               numSeqs = seqNameVector.size();         
+               
+               for(int i=0;i<numSeqs;i++){
+                       int iNumFlowCells = i * numFlowCells;
+                       for(int j=lengths[i];j<numFlowCells;j++){
+                               flowDataIntI[iNumFlowCells + j] = 0;
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getFlowData");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::getSingleLookUp(){
+       try{
+               //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
+               singleLookUp.assign(HOMOPS * NUMBINS, 0);
+               
+               int index = 0;
+               ifstream lookUpFile;
+               m->openInputFile(lookupFileName, lookUpFile);
+               
+               for(int i=0;i<HOMOPS;i++){
+                       float logFracFreq;
+                       lookUpFile >> logFracFreq;
+                       
+                       for(int j=0;j<NUMBINS;j++)      {
+                               lookUpFile >> singleLookUp[index];
+                               index++;
+                       }
+               }       
+               lookUpFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getSingleLookUp");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::getJointLookUp(){
+       try{
+               
+               //      the most likely joint probability (-log) that two intenities have the same polymer length
+               jointLookUp.resize(NUMBINS * NUMBINS, 0);
+               
+               for(int i=0;i<NUMBINS;i++){
+                       for(int j=0;j<NUMBINS;j++){             
+                               
+                               float minSum = 10000000000;
+                               
+                               for(int k=0;k<HOMOPS;k++){
+                                       float sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
+                                       
+                                       if(sum < minSum)        {       minSum = sum;           }
+                               }       
+                               jointLookUp[i * NUMBINS + j] = minSum;
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getJointLookUp");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+float ShhherCommand::getProbIntensity(int intIntensity){                          
+       try{
+               float minNegLogProb = 10000000000; 
+               
+               for(int i=0;i<HOMOPS;i++){//loop signal strength
+                       float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
+                       if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
+               }
+               
+               return minNegLogProb;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getProbIntensity");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::getUniques(){
+       try{
+               
+               
+               numUniques = 0;
+               uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
+               uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
+               uniqueLengths.assign(numSeqs, 0);
+               mapSeqToUnique.assign(numSeqs, -1);
+               mapUniqueToSeq.assign(numSeqs, -1);
+               
+               vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
+               
+               for(int i=0;i<numSeqs;i++){
+                       int index = 0;
+                       
+                       vector<short> current(numFlowCells);
+                       for(int j=0;j<numFlowCells;j++){        current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));        }
+                                               
+                       for(int j=0;j<numUniques;j++){
+                               int offset = j * numFlowCells;
+                               bool toEnd = 1;
+                               
+                               for(int k=0;k<numFlowCells;k++){
+                                       if(current[k] != uniqueFlowgrams[offset + k]){
+                                               toEnd = 0;
+                                               break;
+                                       }
+                               }
+                               
+                               if(toEnd){
+                                       mapSeqToUnique[i] = j;
+                                       uniqueCount[j]++;
+                                       index = j;
+                                       break;
+                               }
+                               index++;
+                       }
+                       
+                       if(index == numUniques){
+                               uniqueLengths[numUniques] = lengths[i];
+                               uniqueCount[numUniques] = 1;
+                               mapSeqToUnique[i] = numUniques;//anMap
+                               mapUniqueToSeq[numUniques] = i;//anF
+                               
+                               for(int k=0;k<numFlowCells;k++){
+                                       uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
+                                       uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
+                               }
+                               
+                               numUniques++;
+                       }
+               }
+               uniqueFlowDataIntI.resize(numFlowCells * numUniques);
+               uniqueLengths.resize(numUniques);       
+               
+               flowDataPrI.assign(numSeqs * numFlowCells, 0);
+               for(int i=0;i<flowDataPrI.size();i++)   {       flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);             }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getUniques");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
+       try{
+               int minLength = lengths[mapSeqToUnique[seqA]];
+               if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
+               
+               int ANumFlowCells = seqA * numFlowCells;
+               int BNumFlowCells = seqB * numFlowCells;
+               
+               float dist = 0;
+               
+               for(int i=0;i<minLength;i++){
+                       int flowAIntI = flowDataIntI[ANumFlowCells + i];
+                       float flowAPrI = flowDataPrI[ANumFlowCells + i];
+                       
+                       int flowBIntI = flowDataIntI[BNumFlowCells + i];
+                       float flowBPrI = flowDataPrI[BNumFlowCells + i];
+                       dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
+               }
+               
+               dist /= (float) minLength;
+               return dist;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int stopSeq){
+       try{            
+
+               ostringstream outStream;
+               outStream.setf(ios::fixed, ios::floatfield);
+               outStream.setf(ios::dec, ios::basefield);
+               outStream.setf(ios::showpoint);
+               outStream.precision(6);
+               
+               int begTime = time(NULL);
+               double begClock = clock();
+
+               for(int i=startSeq;i<stopSeq;i++){
+                       for(int j=0;j<i;j++){
+                               float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
+
+                               if(flowDistance < 1e-6){
+                                       outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << 0.000000 << endl;
+                               }
+                               else if(flowDistance <= cutoff){
+                                       outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << flowDistance << endl;
+                               }
+                       }
+                       if(i % 100 == 0){
+                               m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
+                               m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+                               m->mothurOutEndLine();
+                       }
+               }
+               m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
+               m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
+               m->mothurOutEndLine();
+               
+               ofstream distFile((distFileName + "temp." + toString(pid)).c_str());
+               distFile << outStream.str();            
+               distFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "flowDistParentFork");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+string ShhherCommand::createDistFile(int processors){
+       try{
+               string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
+                               
+               unsigned long int begTime = time(NULL);
+               double begClock = clock();
+
+               vector<int> start;
+               vector<int> end;
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               if(processors == 1)     {       flowDistParentFork(fDistFileName, 0, numUniques);               }
+               else{ //you have multiple processors
+                       
+                       if (numSeqs < processors){      processors = 1; }
+                       
+                       vector<int> start(processors, 0);
+                       vector<int> end(processors, 0);
+                       
+                       for (int i = 0; i < processors; i++) {
+                               start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
+                               end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
+                       }
+                       
+                       int process = 1;
+                       vector<int> processIDs;
+                       
+                       //loop through and create all the processes you want
+                       while (process != processors) {
+                               int pid = fork();
+                               
+                               if (pid > 0) {
+                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                                       process++;
+                               }else if (pid == 0){
+                                       flowDistParentFork(fDistFileName + toString(getpid()) + ".temp", start[process], end[process]);
+                                       exit(0);
+                               }else { 
+                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
+                                       perror(" : ");
+                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
+                                       exit(0);
+                               }
+                       }
+                       
+                       //parent does its part
+                       flowDistParentFork(fDistFileName, start[0], end[0]);
+                       
+                       //force parent to wait until all the processes are done
+                       for (int i=0;i<processIDs.size();i++) { 
+                               int temp = processIDs[i];
+                               wait(&temp);
+                       }
+                       
+                       //append and remove temp files
+                       for (int i=0;i<processIDs.size();i++) { 
+                               m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
+                               remove((fDistFileName + toString(processIDs[i]) + ".temp").c_str());
+                       }
+                       
+               }
+               
+#else
+               flowDistParentFork(fDistFileName, 0, numUniques);
+#endif
+
+               m->mothurOutEndLine();
+               
+               cout << "Total time: " << (time(NULL) - begTime) << "\t"  << (clock() - begClock)/CLOCKS_PER_SEC << endl;;
+
+               return fDistFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "createDistFile");
+               exit(1);
+       }
+       
+}
+
+/**************************************************************************************************/
+
+string ShhherCommand::createNamesFile(){
+       try{
+               
+               vector<string> duplicateNames(numUniques, "");
+               for(int i=0;i<numSeqs;i++){
+                       duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
+               }
+               
+               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.names";
+               
+               ofstream nameFile;
+               m->openOutputFile(nameFileName, nameFile);
+               
+               for(int i=0;i<numUniques;i++){
+//                     nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+                       nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+               }
+               
+               nameFile.close();
+               return  nameFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "createNamesFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+string ShhherCommand::cluster(string distFileName, string namesFileName){
+       try {
+               
+               SparseMatrix* matrix;
+               ListVector* list;
+               RAbundVector* rabund;
+               
+               globaldata->setNameFile(namesFileName);
+               globaldata->setColumnFile(distFileName);
+               globaldata->setFormat("column");
+               
+               ReadMatrix* read = new ReadColumnMatrix(distFileName);  
+               read->setCutoff(cutoff);
+               
+               NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
+               clusterNameMap->readMap();
+               read->read(clusterNameMap);
+               
+               list = read->getListVector();
+               matrix = read->getMatrix();
+               
+               delete read; 
+               delete clusterNameMap; 
+                               
+               rabund = new RAbundVector(list->getRAbundVector());
+               
+               Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
+               string tag = cluster->getTag();
+               
+               double clusterCutoff = cutoff;
+               while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
+                       cluster->update(clusterCutoff);
+                       float dist = matrix->getSmallDist();
+               }
+               
+               list->setLabel(toString(cutoff));
+               
+               string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.list";
+               ofstream listFile;
+               m->openOutputFile(listFileName, listFile);
+               list->print(listFile);
+               listFile.close();
+               
+               delete matrix;  delete cluster; delete rabund; delete list;
+       
+               return listFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "cluster");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::getOTUData(string listFileName){
+       try {
+
+               ifstream listFile;
+               m->openInputFile(listFileName, listFile);
+               string label;
+               
+               listFile >> label >> numOTUs;
+
+               otuData.assign(numSeqs, 0);
+               cumNumSeqs.assign(numOTUs, 0);
+               nSeqsPerOTU.assign(numOTUs, 0);
+               aaP.resize(numOTUs);
+               
+               string singleOTU = "";
+               
+               for(int i=0;i<numOTUs;i++){
+
+                       listFile >> singleOTU;
+                       
+                       istringstream otuString(singleOTU);
+
+                       while(otuString){
+                               
+                               string seqName = "";
+                               
+                               for(int j=0;j<singleOTU.length();j++){
+                                       char letter = otuString.get();
+                                       
+                                       if(letter != ','){
+                                               seqName += letter;
+                                       }
+                                       else{
+                                               map<string,int>::iterator nmIt = nameMap.find(seqName);
+                                               int index = nmIt->second;
+                                               
+                                               nameMap.erase(nmIt);
+                                               
+                                               otuData[index] = i;
+                                               nSeqsPerOTU[i]++;
+                                               aaP[i].push_back(index);
+                                               seqName = "";
+                                       }
+                               }
+                               
+                               map<string,int>::iterator nmIt = nameMap.find(seqName);
+
+                               int index = nmIt->second;
+                               nameMap.erase(nmIt);
+
+                               otuData[index] = i;
+                               nSeqsPerOTU[i]++;
+                               aaP[i].push_back(index);        
+                               
+                               otuString.get();
+                       }
+                       
+                       sort(aaP[i].begin(), aaP[i].end());
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               seqNumber.push_back(aaP[i][j]);
+                       }
+                       for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
+                               aaP[i].push_back(0);
+                       }
+               }
+               
+               for(int i=1;i<numOTUs;i++){
+                       cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
+               }
+               aaI = aaP;
+               seqIndex = seqNumber;
+               
+               listFile.close();       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getOTUData");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::initPyroCluster(){                          
+       try{
+               dist.assign(numSeqs * numOTUs, 0);
+               change.assign(numOTUs, 1);
+               centroids.assign(numOTUs, -1);
+               weight.assign(numOTUs, 0);
+               singleTau.assign(numSeqs, 1.0);
+               
+               nSeqsBreaks.assign(processors+1, 0);
+               nOTUsBreaks.assign(processors+1, 0);
+
+               nSeqsBreaks[0] = 0;
+               for(int i=0;i<processors;i++){
+                       nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
+                       nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
+               }
+               nSeqsBreaks[processors] = numSeqs;
+               nOTUsBreaks[processors] = numOTUs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "initPyroCluster");
+               exit(1);        
+       }
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::fill(){
+       try {
+               int index = 0;
+               for(int i=0;i<numOTUs;i++){
+                       cumNumSeqs[i] = index;
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               seqNumber[index] = aaP[i][j];
+                               seqIndex[index] = aaI[i][j];
+                               
+                               index++;
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "fill");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcCentroids(){                          
+       try{
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
+               if(processors == 1)     {
+                       calcCentroidsDriver(0, numOTUs);                
+               }
+               else{ //you have multiple processors
+                       if (numOTUs < processors){      processors = 1; }
+                       
+                       int process = 1;
+                       vector<int> processIDs;
+                       
+                       //loop through and create all the processes you want
+                       while (process != processors) {
+                               int pid = vfork();
+                               
+                               if (pid > 0) {
+                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                                       process++;
+                               }else if (pid == 0){
+                                       calcCentroidsDriver(nOTUsBreaks[process], nOTUsBreaks[process+1]);
+                                       exit(0);
+                               }else { 
+                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
+                                       perror(" : ");
+                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
+                                       exit(0);
+                               }
+                       }
+                       
+                       //parent does its part
+                       calcCentroidsDriver(nOTUsBreaks[0], nOTUsBreaks[1]);
+
+                       //force parent to wait until all the processes are done
+                       for (int i=0;i<processIDs.size();i++) { 
+                               int temp = processIDs[i];
+                               wait(&temp);
+                       }
+               }
+               
+#else
+               calcCentroidsDriver(0, numOTUs);
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
+       
+       //this function gets the most likely homopolymer length at a flow position for a group of sequences
+       //within an otu
+       
+       try{
+               
+               for(int i=start;i<finish;i++){
+                       
+                       double count = 0;
+                       int position = 0;
+                       int minFlowGram = 100000000;
+                       double minFlowValue = 1e8;
+                       change[i] = 0; //FALSE
+                       
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               count += singleTau[seqNumber[cumNumSeqs[i] + j]];
+                       }
+                       
+                       if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
+                               vector<double> adF(nSeqsPerOTU[i]);
+                               vector<int> anL(nSeqsPerOTU[i]);
+                               
+                               for(int j=0;j<nSeqsPerOTU[i];j++){
+                                       int index = cumNumSeqs[i] + j;
+                                       int nI = seqIndex[index];
+                                       int nIU = mapSeqToUnique[nI];
+                                       
+                                       int k;
+                                       for(k=0;k<position;k++){
+                                               if(nIU == anL[k]){
+                                                       break;
+                                               }
+                                       }
+                                       if(k == position){
+                                               anL[position] = nIU;
+                                               adF[position] = 0.0000;
+                                               position++;
+                                       }                                               
+                               }
+                               
+                               for(int j=0;j<nSeqsPerOTU[i];j++){
+                                       int index = cumNumSeqs[i] + j;
+                                       int nI = seqIndex[index];
+                                       int nIU = mapSeqToUnique[nI];
+                                       
+                                       double tauValue = singleTau[seqNumber[index]];
+                                       
+                                       for(int k=0;k<position;k++){
+                                               double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
+                                               adF[k] += dist * tauValue;
+                                       }
+                               }
+                               
+                               for(int j=0;j<position;j++){
+                                       if(adF[j] < minFlowValue){
+                                               minFlowGram = j;
+                                               minFlowValue = adF[j];
+                                       }
+                               }
+                               
+                               if(centroids[i] != anL[minFlowGram]){
+                                       change[i] = 1;
+                                       centroids[i] = anL[minFlowGram];
+                               }
+                       }
+                       else if(centroids[i] != -1){
+                               change[i] = 1;
+                               centroids[i] = -1;                      
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
+       try{
+               
+               int flowAValue = cent * numFlowCells;
+               int flowBValue = flow * numFlowCells;
+               
+               double dist = 0;
+               
+               for(int i=0;i<length;i++){
+                       dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
+                       flowAValue++;
+                       flowBValue++;
+               }
+               
+               return dist / (double)length;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getDistToCentroid");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getNewWeights(){
+       try{
+               
+               double maxChange = 0;
+               
+               for(int i=0;i<numOTUs;i++){
+                       
+                       double difference = weight[i];
+                       weight[i] = 0;
+                       
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               int index = cumNumSeqs[i] + j;
+                               int nI = seqIndex[index];
+                               double tauValue = singleTau[seqNumber[index]];
+                               weight[i] += tauValue;
+                       }
+                       
+                       difference = fabs(weight[i] - difference);
+                       if(difference > maxChange){     maxChange = difference; }
+               }
+               return maxChange;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getNewWeights");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+double ShhherCommand::getLikelihood(){
+       
+       try{
+               
+               vector<double> P(numSeqs, 0);
+               int effNumOTUs = 0;
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(weight[i] > MIN_WEIGHT){
+                               effNumOTUs++;
+                       }
+               }
+               
+               for(int i=0;i<numOTUs;i++){
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               int index = cumNumSeqs[i] + j;
+                               int nI = seqIndex[index];
+                               double singleDist = dist[seqNumber[index]];
+                               
+                               P[nI] += weight[i] * exp(-singleDist * sigma);
+                       }
+               }
+               
+               double nLL = 0.00;
+               for(int i=0;i<numSeqs;i++){
+                       nLL += -log(P[i]);
+               }
+               
+               nLL = nLL -(double)numSeqs * log(sigma);
+               
+               return nLL; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "getNewWeights");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::checkCentroids(){
+       try{
+               vector<int> unique(numOTUs, 1);
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
+                               unique[i] = -1;
+                       }
+               }
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(unique[i] == 1){
+                               for(int j=i+1;j<numOTUs;j++){
+                                       if(unique[j] == 1){
+                                               
+                                               if(centroids[j] == centroids[i]){
+                                                       unique[j] = 0;
+                                                       centroids[j] = -1;
+                                                       
+                                                       weight[i] += weight[j];
+                                                       weight[j] = 0.0;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "checkCentroids");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcNewDistances(){                          
+       try{
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
+               if(processors == 1)     {
+                       calcNewDistancesParent(0, numSeqs);             
+               }
+               else{ //you have multiple processors
+                       if (numSeqs < processors){      processors = 1; }
+                       
+                       vector<vector<int> > child_otuIndex(processors);
+                       vector<vector<int> > child_seqIndex(processors);
+                       vector<vector<double> > child_singleTau(processors);                    
+                       vector<int> totals(processors);
+                       
+                       int process = 1;
+                       vector<int> processIDs;
+
+                       //loop through and create all the processes you want
+                       while (process != processors) {
+                               int pid = vfork();
+                               
+                               if (pid > 0) {
+                                       processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                                       process++;
+                               }else if (pid == 0){
+                                       calcNewDistancesChild(nSeqsBreaks[process], nSeqsBreaks[process+1], child_otuIndex[process], child_seqIndex[process], child_singleTau[process]);
+                                       totals[process] = child_otuIndex[process].size();
+
+                                       exit(0);
+                               }else { 
+                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
+                                       perror(" : ");
+                                       for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
+                                       exit(0);
+                               }
+                       }
+                       
+                       //parent does its part
+                       calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
+                       int total = seqIndex.size();
+
+                       //force parent to wait until all the processes are done
+                       for (int i=0;i<processIDs.size();i++) { 
+                               int temp = processIDs[i];
+                               wait(&temp);
+                       }
+
+                       for(int i=1;i<processors;i++){
+                               int oldTotal = total;
+                               total += totals[i];
+
+                               singleTau.resize(total, 0);
+                               seqIndex.resize(total, 0);
+                               seqNumber.resize(total, 0);
+                               
+                               int childIndex = 0;
+                               
+                               for(int j=oldTotal;j<total;j++){
+                                       int otuI = child_otuIndex[i][childIndex];
+                                       int seqI = child_seqIndex[i][childIndex];
+
+                                       singleTau[j] = child_singleTau[i][childIndex];
+                                       aaP[otuI][nSeqsPerOTU[otuI]] = j;
+                                       aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
+                                       nSeqsPerOTU[otuI]++;
+
+                                       childIndex++;
+                               }
+                       }
+               }
+#else
+               calcNewDistancesParent(0, numSeqs);             
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcNewDistances");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+#ifdef USE_MPI
+void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
+       
+       try{
+               vector<double> newTau(numOTUs,0);
+               vector<double> norms(numSeqs, 0);
+               otuIndex.resize(0);
+               seqIndex.resize(0);
+               singleTau.resize(0);
+               
+               
+               
+               for(int i=startSeq;i<stopSeq;i++){
+                       double offset = 1e8;
+                       int indexOffset = i * numOTUs;
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               
+                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                               }
+                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                                       offset = dist[indexOffset + j];
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT){
+                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                                       norms[i] += newTau[j];
+                               }
+                               else{
+                                       newTau[j] = 0.0;
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+
+                               newTau[j] /= norms[i];
+                               
+                               if(newTau[j] > MIN_TAU){
+                                       otuIndex.push_back(j);
+                                       seqIndex.push_back(i);
+                                       singleTau.push_back(newTau[j]);
+                               }
+                       }
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
+               exit(1);        
+       }               
+}
+#endif
+/**************************************************************************************************/
+
+void ShhherCommand::calcNewDistancesChild(int startSeq, int stopSeq, vector<int>& child_otuIndex, vector<int>& child_seqIndex, vector<double>& child_singleTau){
+       
+       try{
+               vector<double> newTau(numOTUs,0);
+               vector<double> norms(numSeqs, 0);
+               child_otuIndex.resize(0);
+               child_seqIndex.resize(0);
+               child_singleTau.resize(0);
+               
+               for(int i=startSeq;i<stopSeq;i++){
+                       double offset = 1e8;
+                       int indexOffset = i * numOTUs;
+                       
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                               }
+                               
+                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                                       offset = dist[indexOffset + j];
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT){
+                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                                       norms[i] += newTau[j];
+                               }
+                               else{
+                                       newTau[j] = 0.0;
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               newTau[j] /= norms[i];
+                               
+                               if(newTau[j] > MIN_TAU){
+                                       child_otuIndex.push_back(j);
+                                       child_seqIndex.push_back(i);
+                                       child_singleTau.push_back(newTau[j]);
+                               }
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcNewDistancesChild");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
+       
+       try{
+               
+               int total = 0;
+               vector<double> newTau(numOTUs,0);
+               vector<double> norms(numSeqs, 0);
+               nSeqsPerOTU.assign(numOTUs, 0);
+               
+               for(int i=startSeq;i<stopSeq;i++){
+                       int indexOffset = i * numOTUs;
+                       
+                       double offset = 1e8;
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT && change[j] == 1){
+                                       dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
+                               }
+                               
+                               if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
+                                       offset = dist[indexOffset + j];
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(weight[j] > MIN_WEIGHT){
+                                       newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
+                                       norms[i] += newTau[j];
+                               }
+                               else{
+                                       newTau[j] = 0.0;
+                               }
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               newTau[j] /= norms[i];
+                       }
+                       
+                       for(int j=0;j<numOTUs;j++){
+                               if(newTau[j] > MIN_TAU){
+                                       
+                                       int oldTotal = total;
+                                       
+                                       total++;
+                                       
+                                       singleTau.resize(total, 0);
+                                       seqNumber.resize(total, 0);
+                                       seqIndex.resize(total, 0);
+                                       
+                                       singleTau[oldTotal] = newTau[j];
+                                       
+                                       aaP[j][nSeqsPerOTU[j]] = oldTotal;
+                                       aaI[j][nSeqsPerOTU[j]] = i;
+                                       nSeqsPerOTU[j]++;
+                               }
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::setOTUs(){
+       
+       try {
+               vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
+               
+               for(int i=0;i<numOTUs;i++){
+                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                               int index = cumNumSeqs[i] + j;
+                               double tauValue = singleTau[seqNumber[index]];
+                               int sIndex = seqIndex[index];
+                               bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
+                       }
+               }
+               
+               for(int i=0;i<numSeqs;i++){
+                       double maxTau = -1.0000;
+                       int maxOTU = -1;
+                       for(int j=0;j<numOTUs;j++){
+                               if(bigTauMatrix[i * numOTUs + j] > maxTau){
+                                       maxTau = bigTauMatrix[i * numOTUs + j];
+                                       maxOTU = j;
+                               }
+                       }
+                       
+                       otuData[i] = maxOTU;
+               }
+               
+               nSeqsPerOTU.assign(numOTUs, 0);         
+               
+               for(int i=0;i<numSeqs;i++){
+                       int index = otuData[i];
+                       
+                       singleTau[i] = 1.0000;
+                       dist[i] = 0.0000;
+                       
+                       aaP[index][nSeqsPerOTU[index]] = i;
+                       aaI[index][nSeqsPerOTU[index]] = i;
+                       
+                       nSeqsPerOTU[index]++;
+               }
+               fill(); 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "calcNewDistances");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeQualities(vector<int> otuCounts){
+       
+       try {
+               string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.qual";
+
+               ofstream qualityFile;
+               m->openOutputFile(qualityFileName, qualityFile);
+
+               qualityFile.setf(ios::fixed, ios::floatfield);
+               qualityFile.setf(ios::showpoint);
+               qualityFile << setprecision(6);
+               
+               vector<vector<int> > qualities(numOTUs);
+               vector<double> pr(HOMOPS, 0);
+               
+               int index = 0;
+               
+               for(int i=0;i<numOTUs;i++){
+                       int index = 0;
+                       int base = 0;
+                       
+                       if(nSeqsPerOTU[i] > 0){
+                               qualities[i].assign(1024, -1);
+                               
+                               while(index < numFlowCells){
+                                       double maxPrValue = 1e8;
+                                       short maxPrIndex = -1;
+                                       double count = 0.0000;
+                                       
+                                       pr.assign(HOMOPS, 0);
+                                       
+                                       for(int j=0;j<nSeqsPerOTU[i];j++){
+                                               int lIndex = cumNumSeqs[i] + j;
+                                               double tauValue = singleTau[seqNumber[lIndex]];
+                                               int sequenceIndex = aaI[i][j];
+                                               short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
+                                               
+                                               count += tauValue;
+                                               
+                                               for(int s=0;s<HOMOPS;s++){
+                                                       pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
+                                               }
+                                       }
+                                       
+                                       maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
+                                       maxPrValue = pr[maxPrIndex];
+                                       
+                                       if(count > MIN_COUNT){
+                                               double U = 0.0000;
+                                               double norm = 0.0000;
+                                               
+                                               for(int s=0;s<HOMOPS;s++){
+                                                       norm += exp(-(pr[s] - maxPrValue));
+                                               }
+                                               
+                                               for(int s=1;s<=maxPrIndex;s++){
+                                                       int value = 0;
+                                                       double temp = 0.0000;
+                                                       
+                                                       U += exp(-(pr[s-1]-maxPrValue))/norm;
+                                                       
+                                                       if(U>0.00){
+                                                               temp = log10(U);
+                                                       }
+                                                       else{
+                                                               temp = -10.1;
+                                                       }
+                                                       temp = floor(-10 * temp);
+                                                       value = (int)floor(temp);
+                                                       if(value > 100){        value = 100;    }
+                                                       
+                                                       qualities[i][base] = (int)value;
+                                                       base++;
+                                               }
+                                       }
+                                       
+                                       index++;
+                               }
+                       }
+                       
+                       
+                       if(otuCounts[i] > 0){
+                               qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
+                               
+                               int j=4;        //need to get past the first four bases
+                               while(qualities[i][j] != -1){
+                                       qualityFile << qualities[i][j] << ' ';
+                                       j++;
+                               }
+                               qualityFile << endl;
+                       }
+               }
+               qualityFile.close();
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "writeQualities");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeSequences(vector<int> otuCounts){
+       try {
+               
+               string bases = "TACG";
+               
+               string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
+               ofstream fastaFile;
+               m->openOutputFile(fastaFileName, fastaFile);
+               
+               vector<string> names(numOTUs, "");
+               
+               for(int i=0;i<numOTUs;i++){
+                       int index = centroids[i];
+                       
+                       if(otuCounts[i] > 0){
+                               fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
+                               
+                               for(int j=8;j<numFlowCells;j++){
+                                       
+                                       char base = bases[j % 4];
+                                       for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
+                                               fastaFile << base;
+                                       }
+                               }
+                               fastaFile << endl;
+                       }
+               }
+               fastaFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "writeSequences");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeNames(vector<int> otuCounts){
+       try {
+               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.final.names";
+               ofstream nameFile;
+               m->openOutputFile(nameFileName, nameFile);
+               
+               for(int i=0;i<numOTUs;i++){
+                       if(otuCounts[i] > 0){
+                               nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
+                               
+                               for(int j=1;j<nSeqsPerOTU[i];j++){
+                                       nameFile << ',' << seqNameVector[aaI[i][j]];
+                               }
+                               
+                               nameFile << endl;
+                       }
+               }
+               nameFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "writeNames");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeGroups(){
+       try {
+               string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.'));
+               string groupFileName = fileRoot + ".pn.groups";
+               ofstream groupFile;
+               m->openOutputFile(groupFileName, groupFile);
+               
+               for(int i=0;i<numSeqs;i++){
+                       groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
+               }
+               groupFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "writeGroups");
+               exit(1);        
+       }               
+}
+
+/**************************************************************************************************/
+
+void ShhherCommand::writeClusters(vector<int> otuCounts){
+       try {
+               string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.counts";
+               ofstream otuCountsFile;
+               m->openOutputFile(otuCountsFileName, otuCountsFile);
+               
+               string bases = "TACG";
+               
+               for(int i=0;i<numOTUs;i++){
+                       //output the translated version of the centroid sequence for the otu
+                       if(otuCounts[i] > 0){
+                               int index = centroids[i];
+                               
+                               otuCountsFile << "ideal\t";
+                               for(int j=8;j<numFlowCells;j++){
+                                       char base = bases[j % 4];
+                                       for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
+                                               otuCountsFile << base;
+                                       }
+                               }
+                               otuCountsFile << endl;
+                               
+                               for(int j=0;j<nSeqsPerOTU[i];j++){
+                                       int sequence = aaI[i][j];
+                                       otuCountsFile << seqNameVector[sequence] << '\t';
+                                       
+                                       for(int k=8;k<lengths[sequence];k++){
+                                               char base = bases[k % 4];
+                                               int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
+                                               
+                                               for(int s=0;s<freq;s++){
+                                                       otuCountsFile << base;
+                                               }
+                                       }
+                                       otuCountsFile << endl;
+                               }
+                               otuCountsFile << endl;
+                       }
+               }
+               otuCountsFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "writeClusters");
+               exit(1);        
+       }               
+}
+
+//**********************************************************************************************************************
diff --git a/shhhercommand.h b/shhhercommand.h
new file mode 100644 (file)
index 0000000..b26341e
--- /dev/null
@@ -0,0 +1,113 @@
+#ifndef SHHHER_H
+#define SHHHER_H
+
+/*
+ *  shhher.h
+ *  Mothur
+ *
+ *  Created by Pat Schloss on 12/27/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "globaldata.hpp"
+
+class ShhherCommand : public Command {
+       
+public:
+       ShhherCommand(string);
+       ShhherCommand();
+       ~ShhherCommand();
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();
+       void help();
+       
+private:
+       GlobalData* globaldata;
+       
+       int abort;
+       map<string, vector<string> > outputTypes;
+       
+       string outputDir, flowFileName, flowFilesFileName, lookupFileName;
+       int processors, maxIters;
+       float cutoff, sigma, minDelta;
+       
+       vector<int> nSeqsBreaks;
+       vector<int> nOTUsBreaks;
+       vector<double> flowDataPrI;
+       vector<short> flowDataIntI;
+       vector<int> lengths;
+       vector<string> seqNameVector;
+       vector<double> singleLookUp;
+       vector<double> jointLookUp;
+       map<string, int> nameMap;
+       vector<int> otuData;
+       vector<int> cumNumSeqs;
+       vector<int> nSeqsPerOTU;
+       vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
+       vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
+       vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
+       vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
+       vector<double> dist;            //adDist - distance of sequences to centroids
+       vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
+       vector<int> centroids;          //the representative flowgram for each cluster m
+       vector<double> weight;
+       vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
+       vector<short> uniqueFlowgrams;
+       vector<int> uniqueCount;
+       vector<int> uniqueLengths;
+       vector<int> mapSeqToUnique;
+       vector<int> mapUniqueToSeq;
+       
+       int numSeqs, numUniques, numOTUs, numFlowCells;
+       
+       void getSingleLookUp();
+       void getJointLookUp();
+       void getFlowData();
+       void getUniques();
+       float getProbIntensity(int);
+       float calcPairwiseDist(int, int);
+       void flowDistParentFork(string, int, int);
+       
+       string createDistFile(int);
+       string createNamesFile();
+       string cluster(string, string);
+       
+       void getOTUData(string);
+       void initPyroCluster();
+       void fill();
+       void calcCentroids();
+       void calcCentroidsDriver(int, int);
+       double getDistToCentroid(int, int, int);
+       double getNewWeights();
+       double getLikelihood();
+       void checkCentroids();
+       void calcNewDistances();
+       void calcNewDistancesParent(int, int);
+       void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
+
+
+       void setOTUs();
+       void writeQualities(vector<int>);
+       void writeSequences(vector<int>);
+       void writeNames(vector<int>);
+       void writeGroups();
+       void writeClusters(vector<int>);
+
+       
+#ifdef USE_MPI
+       string flowDistMPI(int, int);
+       void calcNewDistancesChildMPI(int, int, vector<int>&);
+
+       int pid, ncpus; 
+#endif
+       
+};
+
+
+#endif
\ No newline at end of file
index a2c3a88a82f7a0a22a0cc20583290fcf011090d5..3b21d317b2c8f82729418516ad87d23c31392ed0 100644 (file)
@@ -17,11 +17,11 @@ vector<string> TrimFlowsCommand::getValidParameters(){
                string Array[] =  {"flow", "totalflows", "minflows",
                        "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise"
                        "oligos", "pdiffs", "bdiffs", "tdiffs", 
-                       "allfiles",
+                       "allfiles", "processors",
+
 
                        
 //                     "group",
-//                     "processors",
                        "outputdir","inputdir"
                
                };
@@ -112,10 +112,9 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        string AlignArray[] =  {"flow", "totalflows", "minflows",
                                "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise",
                                "oligos", "pdiffs", "bdiffs", "tdiffs", 
-                               "allfiles",
+                               "allfiles", "processors",
                
                                //                      "group",
-                               //                      "processors",
                                "outputdir","inputdir"
                                
                        };
@@ -136,8 +135,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["flow"] = tempOutNames;
-                       //              outputTypes["fasta"] = tempOutNames;
-                       //              outputTypes["group"] = tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -152,13 +150,13 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                                        if (path == "") {       parameters["flow"] = inputDir + it->second;             }
                                }
                                
-//                             it = parameters.find("oligos");
-//                             //user has given a template file
-//                             if(it != parameters.end()){ 
-//                                     path = m->hasPath(it->second);
-//                                     //if the user has not given a path then, add inputdir. else leave path alone.
-//                                     if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
-//                             }
+                               it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
                                
                                
 //                             it = parameters.find("group");
@@ -223,21 +221,21 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
                        convert(temp, pdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
+                       temp = validParameter.validFile(parameters, "tdiffs", false);
+                       if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
                        convert(temp, tdiffs);
                        if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
                        
                        temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "T";          }
                        allFiles = m->isTrue(temp);
                        
-//                     temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
-//                     convert(temp, processors); 
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors); 
                        
-                       if(allFiles && oligoFileName == ""){
-                               m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
-                               allFiles = 0;
-                       }
+                       if(oligoFileName == ""){        allFiles = 0;           }
 
+                       numFPrimers = 0;
+                       numRPrimers = 0;
                }
                
        }
@@ -265,9 +263,68 @@ int TrimFlowsCommand::execute(){
                        outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
                }
                
-               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName);
+               vector<unsigned long int> flowFilePos = getFlowFileBreaks();
+               for (int i = 0; i < (flowFilePos.size()-1); i++) {
+                       lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
+               }       
+
+               vector<vector<string> > barcodePrimerComboFileNames;
+               if(oligoFileName != ""){
+                       getOligos(barcodePrimerComboFileNames); 
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               if(processors == 1){
+                       driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
+               }else{
+                       createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
+               }       
+#else
+               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
+#endif
+               
+               if (m->control_pressed) {  return 0; }                  
+               
+               if(allFiles){
+                       
+                       ofstream output;
+                       string flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+                       m->openOutputFile(flowFilesFileName, output);
+
+                       for(int i=0;i<barcodePrimerComboFileNames.size();i++){
+                               for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
+                                       
+                                       FILE * pFile;
+                                       unsigned long int size;
+                                       
+                                       //get num bytes in file
+                                       pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
+                                       if (pFile==NULL) perror ("Error opening file");
+                                       else{
+                                               fseek (pFile, 0, SEEK_END);
+                                               size=ftell (pFile);
+                                               fclose (pFile);
+                                       }
+
+                                       if(size < 10){
+//                                             m->mothurOut("deleting: " + barcodePrimerComboFileNames[i][j] + '\n');
+                                               remove(barcodePrimerComboFileNames[i][j].c_str());
+                                       }
+                                       else{
+//                                             m->mothurOut("saving: " + barcodePrimerComboFileNames[i][j] + '\n');
+                                               output << barcodePrimerComboFileNames[i][j] << endl;
+                                       }
+                               }
+                       }
+                       output.close();
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
                
-               return 0;                       
+               return 0;       
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "execute");
@@ -277,7 +334,7 @@ int TrimFlowsCommand::execute(){
 
 //***************************************************************************************************************
 
-int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName){
+int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames, linePair* line){
        
        try {
                
@@ -288,41 +345,51 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                ofstream scrapFlowFile;
                m->openOutputFile(scrapFlowFileName, scrapFlowFile);
                scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
-       
-               ifstream flowFile;
-               m->openInputFile(flowFileName, flowFile);
+
+               if(line->start == 4){
+                       scrapFlowFile << totalFlows << endl;
+                       trimFlowFile << totalFlows << endl;
+                       for(int i=0;i<barcodePrimerComboFileNames.size();i++){
+                               for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
+                                       //                              barcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                                       ofstream temp;
+                                       m->openOutputFile(barcodePrimerComboFileNames[i][j], temp);
+                                       temp << totalFlows << endl;
+                                       temp.close();
+                               }
+                       }                       
+               }
+               
+               FlowData flowData(numFlows, signal, noise, maxHomoP);
                
                ofstream fastaFile;
                if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
                
-               vector<vector<string> > barcodePrimerCombos;    
-               if(oligoFileName != ""){        getOligos(barcodePrimerCombos); }
-               
-//             inFASTA.seekg(line->start);
+               ifstream flowFile;
+               m->openInputFile(flowFileName, flowFile);
                
-//             bool done = false;
+               flowFile.seekg(line->start);
+
                int count = 0;
+               bool moreSeqs = 1;
                
-               while (flowFile) {
+               while(moreSeqs) {
                        
                        int success = 1;
                        int currentSeqDiffs = 0;
-                       
                        string trashCode = "";
-
-                       FlowData currFlow(flowFile, signal, noise, maxHomoP);
-                       m->gobble(flowFile);
-                       currFlow.capFlows(totalFlows);  
-                       Sequence currSeq = currFlow.getSequence();
                        
+                       flowData.getNext(flowFile);
+                       flowData.capFlows(totalFlows);  
                        
-                       if(!currFlow.hasMinFlows(minFlows)){            //screen to see if sequence is of a minimum number of flows
+                       Sequence currSeq = flowData.getSequence();
+                       if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
                                success = 0;
                                trashCode += 'l';
                        }
                        
-                       if(minLength > 0 || maxLength > 0){                     //screen to see if sequence is above and below a specific number of bases
-                               int seqLength = currFlow.getSeqLength();
+                       if(minLength > 0 || maxLength > 0){     //screen to see if sequence is above and below a specific number of bases
+                               int seqLength = currSeq.getNumBases();
                                if(seqLength < minLength || seqLength > maxLength){
                                        success = 0;
                                        trashCode += 'l';
@@ -330,6 +397,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        }
                        
                        int primerIndex, barcodeIndex;
+                       
                        if(barcodes.size() != 0){
                                success = stripBarcode(currSeq, barcodeIndex);
                                if(success > bdiffs)            {       trashCode += 'b';       }
@@ -351,73 +419,48 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                                
                                
                        if(trashCode.length() == 0){
-                               currFlow.printFlows(trimFlowFile);
+                               flowData.printFlows(trimFlowFile);
                                
                                if(fasta){
-                                       currFlow.printFASTA(fastaFile);
+                                       currSeq.printSequence(fastaFile);
                                }
                                
                                if(barcodes.size() != 0 || primers.size() != 0){
-                                       string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow";
+//                                     string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow";
                                        ofstream output;
-                                       m->openOutputFileAppend(fileName, output);
-                                       currFlow.printFlows(output);
+                                       m->openOutputFileAppend(barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
+                                       output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
+                                       
+                                       flowData.printFlows(output);
                                        output.close();
                                }
                                
                        }
                        else{
-                               currFlow.printFlows(scrapFlowFile, trashCode);
+                               flowData.printFlows(scrapFlowFile, trashCode);
                        }
                                
-//                             if(barcodes.size() != 0){
-//                                     string thisGroup = groupVector[groupBar];
-//                                     int indexToFastaFile = groupBar;
-//                                     if (primers.size() != 0){
-//                                             //does this primer have a group
-//                                             if (groupVector[groupPrime] != "") {  
-//                                                     thisGroup += "." + groupVector[groupPrime]; 
-//                                                     indexToFastaFile = combos[thisGroup];
-//                                             }
-//                                     }
-//                                     outGroups << currSeq.getName() << '\t' << thisGroup << endl;
-//                                     if(allFiles){
-//                                             ofstream outTemp;
-//                                             m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
-//                                             //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
-//                                             currSeq.printSequence(outTemp);
-//                                             outTemp.close();
-//                                             
-//                                             if(qFileName != ""){
-//                                                     //currQual.printQScores(*qualFileNames[indexToFastaFile]);
-//                                                     ofstream outTemp2;
-//                                                     m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
-//                                                     currQual.printQScores(outTemp2);
-//                                                     outTemp2.close();                                                       
-//                                             }
-//                                     }
-//                             }
-//                     }
-//                     else{
-//                             currSeq.setName(currSeq.getName() + '|' + trashCode);
-//                             currSeq.setUnaligned(origSeq);
-//                             currSeq.setAligned(origSeq);
-//                             currSeq.printSequence(scrapFASTA);
-//                     }
-
                        count++;
                                                
                        //report progress
-                       if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       unsigned long int pos = flowFile.tellg();
+
+                       if ((pos == -1) || (pos >= line->end)) { break; }
+#else
+                       if (flowFile.eof()) { break; }
+#endif
                        
                }
                //report progress
-               if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-               
+               if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                
                trimFlowFile.close();
                scrapFlowFile.close();
                flowFile.close();
+               if(fasta){      fastaFile.close();      }
                
                return count;
        }
@@ -526,7 +569,7 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                        
                                        outputNames.push_back(fileName);
                                        outputTypes["flow"].push_back(fileName);
-                                       outFlowFileNames[itBar->second][itPrimer->second] = comboGroupName;
+                                       outFlowFileNames[itBar->second][itPrimer->second] = fileName;
                                        
                                        ofstream temp;
                                        m->openOutputFile(fileName, temp);
@@ -571,11 +614,9 @@ int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){
                }
                
                //if you found the barcode or if you don't want to allow for diffs
-               //              cout << success;
                if ((bdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       //                      cout << endl;
                        
                        int maxLength = 0;
                        
@@ -687,11 +728,9 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                }
                
                //if you found the barcode or if you don't want to allow for diffs
-               //              cout << success;
                if ((pdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       //                      cout << endl;
                        
                        int maxLength = 0;
                        
@@ -739,8 +778,6 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                                int newStart=0;
                                int numDiff = countDiffs(oligo, temp);
                                
-                               //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
-                               
                                if(numDiff < minDiff){
                                        minDiff = numDiff;
                                        minCount = 1;
@@ -887,17 +924,184 @@ int TrimFlowsCommand::countDiffs(string oligo, string seq){
        
 }
 
-//***************************************************************************************************************
-
-
-
+/**************************************************************************************************/
 
+vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
 
+       try{
+                       
+               vector<unsigned long int> filePos;
+               filePos.push_back(0);
+                                       
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (flowFileName.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+                               
+               //estimate file breaks
+               unsigned long int chunkSize = 0;
+               chunkSize = size / processors;
 
+               //file too small to divide by processors
+               if (chunkSize == 0)  {  processors = 1; filePos.push_back(size); return filePos;        }
+               
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < processors; i++) {
+                       unsigned long int spot = (i+1) * chunkSize;
+                       
+                       ifstream in;
+                       m->openInputFile(flowFileName, in);
+                       in.seekg(spot);
+                       
+                       unsigned long int newSpot = spot;
+                       string dummy = m->getline(in);
+                       
+                       
+                       //there was not another sequence before the end of the file
+                       unsigned long int sanityPos = in.tellg();
+                       
+//                     if (sanityPos == -1) {  break;  }
+//                     else {  filePos.push_back(newSpot);  }
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(sanityPos);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
 
+               ifstream in;
+               m->openInputFile(flowFileName, in);
+               in >> numFlows;
+               m->gobble(in);
+               unsigned long int spot = in.tellg();
+               filePos[0] = spot;
+               in.close();
+               
+               processors = (filePos.size() - 1);
+               
+               return filePos; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks");
+               exit(1);
+       }
+}
 
+/**************************************************************************************************/
 
+int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
 
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 1;
+               int exitCommand = 1;
+               processIDS.clear();
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               
+                               vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
+                               for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
+                                       for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
+                                               tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                                               ofstream temp;
+                                               m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
+                                               temp.close();
+                                               
+                                       }
+                               }
+                                               
+                               driverCreateTrim(flowFileName,
+                                                                (trimFlowFileName + toString(getpid()) + ".temp"),
+                                                                (scrapFlowFileName + toString(getpid()) + ".temp"),
+                                                                (fastaFileName + toString(getpid()) + ".temp"),
+                                                                tempBarcodePrimerComboFileNames, lines[process]);
+
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //parent do my part
+               ofstream temp;
+               m->openOutputFile(trimFlowFileName, temp);
+               temp.close();
 
+               m->openOutputFile(scrapFlowFileName, temp);
+               temp.close();
+               
+               if(fasta){
+                       m->openOutputFile(fastaFileName, temp);
+                       temp.close();
+               }
+               
+               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
 
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               //append files
+               m->mothurOutEndLine();
+               for(int i=0;i<processIDS.size();i++){
+                       
+                       m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
+                       
+                       m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
+                       remove((trimFlowFileName + toString(processIDS[i]) + ".temp").c_str());
+//                     m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
+
+                       m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
+                       remove((scrapFlowFileName + toString(processIDS[i]) + ".temp").c_str());
+//                     m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
+
+                       if(fasta){
+                               m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
+                               remove((fastaFileName + toString(processIDS[i]) + ".temp").c_str());
+//                             m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
+                       }
+                       if(allFiles){                                           
+                               for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
+                                       for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
+                                               m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
+                                               remove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                       }
+                               }
+                       }
+               }
+               
+               return exitCommand;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");
+               exit(1);
+       }
+}
 
+//***************************************************************************************************************
index 215b1a2f69bf717e11e2a8f25ce1761e7c0951f9..15c698c2ffe6075101ab1105ff60f579c349c425 100644 (file)
@@ -31,8 +31,6 @@ public:
 private:
        bool abort;
 
-//     GroupMap* groupMap;
-       
        struct linePair {
                unsigned long int start;
                unsigned long int end;
@@ -41,11 +39,16 @@ private:
        int comboStarts;
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
-       vector<linePair*> qLines;
+
+       vector<unsigned long int> getFlowFileBreaks();
+       int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
+       int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
+
+       
        map<string, vector<string> > outputTypes;
        vector<string> outputNames;
        set<string> filesToRemove;
-
+       
        
        
        void getOligos(vector<vector<string> >&);               //a rewrite of what is in trimseqscommand.h
@@ -54,12 +57,12 @@ private:
        bool stripReverse(Sequence&);                                   //largely redundant with trimseqscommand.h
        bool compareDNASeq(string, string);                             //largely redundant with trimseqscommand.h
        int countDiffs(string, string);                                 //largely redundant with trimseqscommand.h
-
        
        bool allFiles;
-//     int processors;
+       int processors;
        int numFPrimers, numRPrimers;
        int totalFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs;
+       int numFlows;
        float signal, noise;
        bool fasta;
        
@@ -77,12 +80,6 @@ private:
        map<string, int> combos;                        //needed here?
        map<string, int> groupToIndex;          //needed here?
        
-       
-       int driverCreateTrim(string, string, string, string);
-       
-//     int createProcessesCreateTrim(string, string, string, string, string, string, string, vector<string>, vector<string>){};
-       int setLines(string, string, vector<unsigned long int>&, vector<unsigned long int>&){};
-       
 };
 
 
index b40b7e5b0123d848a26df5afd803f3625df9a4a8..20002049ce986b01f5395ed2b807471cf954133f 100644 (file)
@@ -883,6 +883,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned
                exit(1);
        }
 }
+
 //***************************************************************************************************************
 
 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){