]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / shhhercommand.cpp
index 24301a2c18b3decebd1a9c5a3954ffc96d57b90b..74669ed0d9dba0425c81e74852482a62e4f29b46 100644 (file)
@@ -16,6 +16,7 @@
 #include "listvector.hpp"
 #include "cluster.hpp"
 #include "sparsematrix.hpp"
+#include <cfloat>
 
 //**********************************************************************************************************************
 
 #define MIN_WEIGHT 0.1
 #define MIN_TAU 0.0001
 #define MIN_ITER 10
-
 //**********************************************************************************************************************
-
-vector<string> ShhherCommand::getValidParameters(){    
+vector<string> ShhherCommand::setParameters(){ 
        try {
-               string Array[] =  {     
-                       "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
-               };
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
+               CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
+               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
+               CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
+               CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
+               CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getValidParameters");
+               m->errorOut(e, "ShhherCommand", "setParameters");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
-
-ShhherCommand::ShhherCommand(){        
+string ShhherCommand::getHelpString(){ 
        try {
-               abort = true;
-               
-               //initialize outputTypes
-               vector<string> tempOutNames;
-               outputTypes["pn.dist"] = tempOutNames;
-
+               string helpString = "";
+               helpString += "The shhh.seqs command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "ShhherCommand");
+               m->errorOut(e, "ShhherCommand", "getHelpString");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
-vector<string> ShhherCommand::getRequiredParameters(){ 
+ShhherCommand::ShhherCommand(){        
        try {
-               string Array[] =  {"flow"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getRequiredParameters");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
+               abort = true; calledHelp = true;
+               setParameters();
+               
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["pn.dist"] = tempOutNames;
 
-vector<string> ShhherCommand::getRequiredFiles(){      
-       try {
-               vector<string> myArray;
-               return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "getRequiredFiles");
+               m->errorOut(e, "ShhherCommand", "ShhherCommand");
                exit(1);
        }
 }
@@ -100,20 +93,14 @@ ShhherCommand::ShhherCommand(string option) {
 #endif
                
                
-               abort = false;
+               abort = false; calledHelp = false;   
                
                
                //allow user to run help
-               if(option == "help") { help(); abort = true; }
+               if(option == "help") { help(); abort = true; calledHelp = true; }
                
                else {
-                       
-                       //valid paramters for this command
-                       string AlignArray[] =  {
-                               "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
-                       };
-                       
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -170,7 +157,15 @@ ShhherCommand::ShhherCommand(string option) {
                                m->mothurOutEndLine();
                                abort = true; 
                        }
-                       else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }       
+                       else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
+                       
+                       if(flowFileName != "not found"){        compositeFASTAFileName = "";    }
+                       else{
+                               compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "pn.fasta";
+                               ofstream temp;
+                               m->openOutputFile(compositeFASTAFileName, temp);
+                               temp.close();
+                       }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -183,12 +178,13 @@ ShhherCommand::ShhherCommand(string option) {
                        // ...at some point should added some additional type checking...
                        string temp;
                        temp = validParameter.validFile(parameters, "lookup", true);
-                       if (temp == "not found")        {       lookupFileName = "LookUp_E123.pat";     }
+                       if (temp == "not found")        {       lookupFileName = "LookUp_Titanium.pat"; }
                        else if(temp == "not open")     {       abort = true;                   } 
                        else                                            {       lookupFileName = temp;  }
                        
-                       temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){      temp = "1";                     }
-                       convert(temp, processors); 
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
 
                        temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
                        convert(temp, cutoff); 
@@ -202,7 +198,12 @@ ShhherCommand::ShhherCommand(string option) {
                        temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
                        convert(temp, sigma); 
                        
-                       globaldata = GlobalData::getInstance();
+                       flowOrder = validParameter.validFile(parameters, "order", false);
+                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
+                       else if(flowOrder.length() != 4){
+                               m->mothurOut("The value of the order option must be four bases long\n");
+                       }
+                       
                }
                        
 #ifdef USE_MPI
@@ -215,38 +216,15 @@ ShhherCommand::ShhherCommand(string option) {
                exit(1);
        }
 }
-
-//**********************************************************************************************************************
-
-ShhherCommand::~ShhherCommand(){}
-
-//**********************************************************************************************************************
-
-void ShhherCommand::help(){
-       try {
-               m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ShhherCommand", "help");
-               exit(1);
-       }
-}
-
 //**********************************************************************************************************************
 #ifdef USE_MPI
 int ShhherCommand::execute(){
        try {
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
                int tag = 1976;
                MPI_Status status; 
 
-               double begClock = clock();
-               unsigned long int begTime = time(NULL);
-
-               cout.setf(ios::fixed, ios::floatfield);
-               cout.setf(ios::showpoint);
-               cout << setprecision(2);
-               
-               
                if(pid == 0){
 
                        for(int i=1;i<ncpus;i++){
@@ -256,10 +234,10 @@ int ShhherCommand::execute(){
 
                        processors = ncpus;
                        
-                       cout << "\nGetting preliminary data..." << endl;
+                       m->mothurOut("\nGetting preliminary data...\n");
                        getSingleLookUp();
                        getJointLookUp();
-                                               
+                       
                        vector<string> flowFileVector;
                        if(flowFilesFileName != "not found"){
                                string fName;
@@ -282,15 +260,19 @@ int ShhherCommand::execute(){
                        }
                        
                        for(int i=0;i<numFiles;i++){
+                               double begClock = clock();
+                               unsigned long int begTime = time(NULL);
+
                                flowFileName = flowFileVector[i];
-                       
-                               cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
-                               cout << "Reading flowgrams..." << endl;
+                               
+                               m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+                               m->mothurOut("Reading flowgrams...\n");
                                getFlowData();
-                               cout << "Identifying unique flowgrams..." << endl;
+
+                               m->mothurOut("Identifying unique flowgrams...\n");
                                getUniques();
 
-                               cout << "Calculating distances between flowgrams..." << endl;
+                               m->mothurOut("Calculating distances between flowgrams...\n");
                                char fileName[1024];
                                strcpy(fileName, flowFileName.c_str());
 
@@ -321,11 +303,9 @@ int ShhherCommand::execute(){
 
                                string namesFileName = createNamesFile();
                                
-                               cout << "\nClustering flowgrams..." << endl;
+                               m->mothurOut("\nClustering flowgrams...\n");
                                string listFileName = cluster(distFileName, namesFileName);
-       //                      string listFileName = "PriestPot_C7.pn.list";
-       //                      string listFileName = "test.mock_rep3.v69.pn.list";
-                       
+
                                getOTUData(listFileName);
                                initPyroCluster();
 
@@ -342,9 +322,8 @@ int ShhherCommand::execute(){
                                
                                int numOTUsOnCPU = numOTUs / ncpus;
                                int numSeqsOnCPU = numSeqs / ncpus;
-                               
-                               cout << "\nDenoising flowgrams..." << endl;
-                               cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+                               m->mothurOut("\nDenoising flowgrams...\n");
+                               m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
                                
                                while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
 
@@ -364,7 +343,7 @@ int ShhherCommand::execute(){
                                                MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
                                                MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
                                        }
-                               
+                                       
                                        calcCentroidsDriver(0, numOTUsOnCPU);
                                        
                                        for(int i=1;i<ncpus;i++){
@@ -445,7 +424,7 @@ int ShhherCommand::execute(){
                                        
                                        iter++;
                                        
-                                       cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
+                                       m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
 
                                        if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
                                                int live = 1;
@@ -462,7 +441,7 @@ int ShhherCommand::execute(){
                                        
                                }       
                                
-                               cout << "\nFinalizing..." << endl;
+                               m->mothurOut("\nFinalizing...\n");
                                fill();
                                setOTUs();
                                vector<int> otuCounts(numOTUs, 0);
@@ -477,14 +456,12 @@ int ShhherCommand::execute(){
                                remove(distFileName.c_str());
                                remove(namesFileName.c_str());
                                remove(listFileName.c_str());
-                               
-                               cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
+                                                                
+                               m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
                        }
-
                }
                else{
                        int abort = 1;
-                       bool live = 1;
 
                        MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                        if(abort){      return 0;       }
@@ -494,6 +471,8 @@ int ShhherCommand::execute(){
 
                        for(int i=0;i<numFiles;i++){
                                //Now into the pyrodist part
+                               bool live = 1;
+
                                char fileName[1024];
                                MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
                                MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
@@ -549,12 +528,12 @@ int ShhherCommand::execute(){
                                int total;
 
                                while(live){
-
+                                       
                                        MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        singleTau.assign(total, 0.0000);
                                        seqNumber.assign(total, 0);
                                        seqIndex.assign(total, 0);
-                                       
+
                                        MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
@@ -562,13 +541,12 @@ int ShhherCommand::execute(){
                                        MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                                       
+
                                        calcCentroidsDriver(startOTU, endOTU);
 
                                        MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
                                        MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
 
-                                       
                                        MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
                                        MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
@@ -587,8 +565,8 @@ int ShhherCommand::execute(){
                                }
                        }
                }               
-
                MPI_Barrier(MPI_COMM_WORLD);
+
                return 0;
 
        }
@@ -623,10 +601,11 @@ string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
                                }
                        }
                        if(i % 100 == 0){
-                               cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+                               m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
                        }
                }
-               cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
+               
+               m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
                
                string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
                if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
@@ -650,13 +629,9 @@ int ShhherCommand::execute(){
        try {
                if (abort == true) { return 0; }
                
-               cout.setf(ios::fixed, ios::floatfield);
-               cout.setf(ios::showpoint);
-               
                getSingleLookUp();
                getJointLookUp();
-               
-               
+                               
                vector<string> flowFileVector;
                if(flowFilesFileName != "not found"){
                        string fName;
@@ -678,18 +653,19 @@ int ShhherCommand::execute(){
                for(int i=0;i<numFiles;i++){
                        flowFileName = flowFileVector[i];
 
-                       cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
-                       cout << "Reading flowgrams..." << endl;
+                       m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
+                       m->mothurOut("Reading flowgrams...\n");
                        getFlowData();
-                       cout << "Identifying unique flowgrams..." << endl;
+                       
+                       m->mothurOut("Identifying unique flowgrams...\n");
                        getUniques();
                        
                        
-                       cout << "Calculating distances between flowgrams..." << endl;                   
+                       m->mothurOut("Calculating distances between flowgrams...\n");
                        string distFileName = createDistFile(processors);
                        string namesFileName = createNamesFile();
-                       
-                       cout << "\nClustering flowgrams..." << endl;
+                               
+                       m->mothurOut("\nClustering flowgrams...\n");
                        string listFileName = cluster(distFileName, namesFileName);
                        getOTUData(listFileName);
                        
@@ -701,8 +677,8 @@ int ShhherCommand::execute(){
                        double begClock = clock();
                        unsigned long int begTime = time(NULL);
 
-                       cout << "\nDenoising flowgrams..." << endl;
-                       cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
+                       m->mothurOut("\nDenoising flowgrams...\n");
+                       m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
                        
                        while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
                                
@@ -720,10 +696,11 @@ int ShhherCommand::execute(){
 
                                iter++;
                                
-                               cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
+                               m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
+
                        }       
                        
-                       cout << "\nFinalizing..." << endl;
+                       m->mothurOut("\nFinalizing...\n");
                        fill();
                        setOTUs();
                        
@@ -741,7 +718,7 @@ int ShhherCommand::execute(){
                        remove(namesFileName.c_str());
                        remove(listFileName.c_str());
                        
-                       cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
+                       m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                return 0;
        }
@@ -759,6 +736,11 @@ void ShhherCommand::getFlowData(){
                m->openInputFile(flowFileName, flowFile);
                
                string seqName;
+               seqNameVector.clear();
+               lengths.clear();
+               flowDataIntI.clear();
+               nameMap.clear();
+               
                
                int currentNumFlowCells;
                
@@ -838,10 +820,10 @@ void ShhherCommand::getJointLookUp(){
                for(int i=0;i<NUMBINS;i++){
                        for(int j=0;j<NUMBINS;j++){             
                                
-                               float minSum = 10000000000;
+                               double minSum = 100000000;
                                
                                for(int k=0;k<HOMOPS;k++){
-                                       float sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
+                                       double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
                                        
                                        if(sum < minSum)        {       minSum = sum;           }
                                }       
@@ -857,9 +839,11 @@ void ShhherCommand::getJointLookUp(){
 
 /**************************************************************************************************/
 
-float ShhherCommand::getProbIntensity(int intIntensity){                          
+double ShhherCommand::getProbIntensity(int intIntensity){                          
        try{
-               float minNegLogProb = 10000000000; 
+
+               double minNegLogProb = 100000000; 
+
                
                for(int i=0;i<HOMOPS;i++){//loop signal strength
                        float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
@@ -893,13 +877,19 @@ void ShhherCommand::getUniques(){
                        int index = 0;
                        
                        vector<short> current(numFlowCells);
-                       for(int j=0;j<numFlowCells;j++){        current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));        }
+                       for(int j=0;j<numFlowCells;j++){
+                               current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
+                       }
                                                
                        for(int j=0;j<numUniques;j++){
                                int offset = j * numFlowCells;
                                bool toEnd = 1;
                                
-                               for(int k=0;k<numFlowCells;k++){
+                               int shorterLength;
+                               if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
+                               else                                                            {       shorterLength = uniqueLengths[j];       }
+
+                               for(int k=0;k<shorterLength;k++){
                                        if(current[k] != uniqueFlowgrams[offset + k]){
                                                toEnd = 0;
                                                break;
@@ -910,6 +900,7 @@ void ShhherCommand::getUniques(){
                                        mapSeqToUnique[i] = j;
                                        uniqueCount[j]++;
                                        index = j;
+                                       if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
                                        break;
                                }
                                index++;
@@ -932,7 +923,7 @@ void ShhherCommand::getUniques(){
                uniqueFlowDataIntI.resize(numFlowCells * numUniques);
                uniqueLengths.resize(numUniques);       
                
-               flowDataPrI.assign(numSeqs * numFlowCells, 0);
+               flowDataPrI.resize(numSeqs * numFlowCells, 0);
                for(int i=0;i<flowDataPrI.size();i++)   {       flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);             }
        }
        catch(exception& e) {
@@ -990,10 +981,10 @@ void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int st
                                float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
 
                                if(flowDistance < 1e-6){
-                                       outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << 0.000000 << endl;
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
                                }
                                else if(flowDistance <= cutoff){
-                                       outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << flowDistance << endl;
+                                       outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
                                }
                        }
                        if(i % 100 == 0){
@@ -1006,7 +997,7 @@ void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int st
                m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
                m->mothurOutEndLine();
                
-               ofstream distFile((distFileName + "temp." + toString(pid)).c_str());
+               ofstream distFile(distFileName.c_str());
                distFile << outStream.str();            
                distFile.close();
        }
@@ -1086,7 +1077,8 @@ string ShhherCommand::createDistFile(int processors){
 
                m->mothurOutEndLine();
                
-               cout << "Total time: " << (time(NULL) - begTime) << "\t"  << (clock() - begClock)/CLOCKS_PER_SEC << endl;;
+               m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
+               
 
                return fDistFileName;
        }
@@ -1131,14 +1123,6 @@ string ShhherCommand::createNamesFile(){
 string ShhherCommand::cluster(string distFileName, string namesFileName){
        try {
                
-               SparseMatrix* matrix;
-               ListVector* list;
-               RAbundVector* rabund;
-               
-               globaldata->setNameFile(namesFileName);
-               globaldata->setColumnFile(distFileName);
-               globaldata->setFormat("column");
-               
                ReadMatrix* read = new ReadColumnMatrix(distFileName);  
                read->setCutoff(cutoff);
                
@@ -1146,13 +1130,13 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                clusterNameMap->readMap();
                read->read(clusterNameMap);
                
-               list = read->getListVector();
-               matrix = read->getMatrix();
+               ListVector* list = read->getListVector();
+               SparseMatrix* matrix = read->getMatrix();
                
                delete read; 
                delete clusterNameMap; 
                                
-               rabund = new RAbundVector(list->getRAbundVector());
+               RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
                Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
                string tag = cluster->getTag();
@@ -1160,7 +1144,6 @@ string ShhherCommand::cluster(string distFileName, string namesFileName){
                double clusterCutoff = cutoff;
                while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
                        cluster->update(clusterCutoff);
-                       float dist = matrix->getSmallDist();
                }
                
                list->setLabel(toString(cutoff));
@@ -1195,7 +1178,11 @@ void ShhherCommand::getOTUData(string listFileName){
                otuData.assign(numSeqs, 0);
                cumNumSeqs.assign(numOTUs, 0);
                nSeqsPerOTU.assign(numOTUs, 0);
-               aaP.resize(numOTUs);
+               aaP.clear();aaP.resize(numOTUs);
+               
+               seqNumber.clear();
+               aaI.clear();
+               seqIndex.clear();
                
                string singleOTU = "";
                
@@ -1247,6 +1234,8 @@ void ShhherCommand::getOTUData(string listFileName){
                        for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
                                aaP[i].push_back(0);
                        }
+                       
+                       
                }
                
                for(int i=1;i<numOTUs;i++){
@@ -1256,6 +1245,7 @@ void ShhherCommand::getOTUData(string listFileName){
                seqIndex = seqNumber;
                
                listFile.close();       
+               
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "getOTUData");
@@ -1374,6 +1364,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
        
        try{
                
+       
                for(int i=start;i<finish;i++){
                        
                        double count = 0;
@@ -1385,7 +1376,7 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                count += singleTau[seqNumber[cumNumSeqs[i] + j]];
                        }
-                       
+
                        if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
                                vector<double> adF(nSeqsPerOTU[i]);
                                vector<int> anL(nSeqsPerOTU[i]);
@@ -1411,7 +1402,6 @@ void ShhherCommand::calcCentroidsDriver(int start, int finish){
                                for(int j=0;j<nSeqsPerOTU[i];j++){
                                        int index = cumNumSeqs[i] + j;
                                        int nI = seqIndex[index];
-                                       int nIU = mapSeqToUnique[nI];
                                        
                                        double tauValue = singleTau[seqNumber[index]];
                                        
@@ -1483,7 +1473,6 @@ double ShhherCommand::getNewWeights(){
                        
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                int index = cumNumSeqs[i] + j;
-                               int nI = seqIndex[index];
                                double tauValue = singleTau[seqNumber[index]];
                                weight[i] += tauValue;
                        }
@@ -1505,7 +1494,7 @@ double ShhherCommand::getLikelihood(){
        
        try{
                
-               vector<double> P(numSeqs, 0);
+               vector<long double> P(numSeqs, 0);
                int effNumOTUs = 0;
                
                for(int i=0;i<numOTUs;i++){
@@ -1514,6 +1503,7 @@ double ShhherCommand::getLikelihood(){
                        }
                }
                
+               string hold;
                for(int i=0;i<numOTUs;i++){
                        for(int j=0;j<nSeqsPerOTU[i];j++){
                                int index = cumNumSeqs[i] + j;
@@ -1523,14 +1513,15 @@ double ShhherCommand::getLikelihood(){
                                P[nI] += weight[i] * exp(-singleDist * sigma);
                        }
                }
-               
                double nLL = 0.00;
                for(int i=0;i<numSeqs;i++){
+                       if(P[i] == 0){  P[i] = DBL_EPSILON;     }
+
                        nLL += -log(P[i]);
                }
                
                nLL = nLL -(double)numSeqs * log(sigma);
-               
+
                return nLL; 
        }
        catch(exception& e) {
@@ -1665,9 +1656,9 @@ void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<i
        try{
                vector<double> newTau(numOTUs,0);
                vector<double> norms(numSeqs, 0);
-               otuIndex.resize(0);
-               seqIndex.resize(0);
-               singleTau.resize(0);
+               otuIndex.clear();
+               seqIndex.clear();
+               singleTau.clear();
                
                
                
@@ -1900,7 +1891,6 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
                vector<vector<int> > qualities(numOTUs);
                vector<double> pr(HOMOPS, 0);
                
-               int index = 0;
                
                for(int i=0;i<numOTUs;i++){
                        int index = 0;
@@ -1991,8 +1981,6 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
 void ShhherCommand::writeSequences(vector<int> otuCounts){
        try {
                
-               string bases = "TACG";
-               
                string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
@@ -2005,17 +1993,24 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
                        if(otuCounts[i] > 0){
                                fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
                                
-                               for(int j=8;j<numFlowCells;j++){
+                               string newSeq = "";
+                               
+                               for(int j=0;j<numFlowCells;j++){
                                        
-                                       char base = bases[j % 4];
+                                       char base = flowOrder[j % 4];
                                        for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
-                                               fastaFile << base;
+                                               newSeq += base;
                                        }
                                }
-                               fastaFile << endl;
+                               
+                               fastaFile << newSeq.substr(4) << endl;
                        }
                }
                fastaFile.close();
+               
+               if(compositeFASTAFileName != ""){
+                       m->appendFiles(fastaFileName, compositeFASTAFileName);
+               }
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "writeSequences");
@@ -2078,7 +2073,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
-               string bases = "TACG";
+               string bases = flowOrder;
                
                for(int i=0;i<numOTUs;i++){
                        //output the translated version of the centroid sequence for the otu
@@ -2098,15 +2093,18 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
                                        int sequence = aaI[i][j];
                                        otuCountsFile << seqNameVector[sequence] << '\t';
                                        
-                                       for(int k=8;k<lengths[sequence];k++){
+                                       string newSeq = "";
+                                       
+                                       for(int k=0;k<lengths[sequence];k++){
                                                char base = bases[k % 4];
                                                int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
-                                               
+                                                       
                                                for(int s=0;s<freq;s++){
-                                                       otuCountsFile << base;
+                                                       newSeq += base;
+                                                       //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << endl;
+                                       otuCountsFile << newSeq.substr(4) << endl;
                                }
                                otuCountsFile << endl;
                        }