]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.cpp
Pat's testing
[mothur.git] / shhhercommand.cpp
1 /*
2  *  shhher.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhercommand.h"
11
12 #include "readcolumn.h"
13 #include "readmatrix.hpp"
14 #include "rabundvector.hpp"
15 #include "sabundvector.hpp"
16 #include "listvector.hpp"
17 #include "cluster.hpp"
18 #include "sparsematrix.hpp"
19 #include <cfloat>
20
21 //**********************************************************************************************************************
22 vector<string> ShhherCommand::setParameters(){  
23         try {
24                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
25                 CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
26                 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
27                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
30                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
31                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
32                 CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
33                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
34                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
35                 
36                 vector<string> myArray;
37                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
38                 return myArray;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "ShhherCommand", "setParameters");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 string ShhherCommand::getHelpString(){  
47         try {
48                 string helpString = "";
49                 helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "ShhherCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58
59 ShhherCommand::ShhherCommand(){ 
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 
64                 //initialize outputTypes
65 //              vector<string> tempOutNames;
66 //              outputTypes["pn.dist"] = tempOutNames;
67
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
71                 exit(1);
72         }
73 }
74
75 //**********************************************************************************************************************
76
77 ShhherCommand::ShhherCommand(string option) {
78         try {
79
80 #ifdef USE_MPI
81                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
82                 MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
83
84                 if(pid == 0){
85 #endif
86                 
87                 
88                 abort = false; calledHelp = false;   
89                 
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         map<string,string>::iterator it;
103                         
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111 //                      outputTypes["pn.dist"] = tempOutNames;
112                         //                      outputTypes["fasta"] = tempOutNames;
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("flow");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
125                                 }
126                                 
127                                 it = parameters.find("lookup");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
133                                 }
134
135                                 it = parameters.find("file");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
141                                 }
142                         }
143                         
144                         
145                         //check for required parameters
146                         flowFileName = validParameter.validFile(parameters, "flow", true);
147                         flowFilesFileName = validParameter.validFile(parameters, "file", true);
148                         if (flowFileName == "not found" && flowFilesFileName == "not found") {
149                                 m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
150                                 m->mothurOutEndLine();
151                                 abort = true; 
152                         }
153                         else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
154                         
155                         if(flowFileName != "not found"){
156                                 compositeFASTAFileName = "";    
157                                 compositeNamesFileName = "";    
158                         }
159                         else{
160                                 ofstream temp;
161
162                                 //flow.files = 9 character offset
163                                 compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
164                                 m->openOutputFile(compositeFASTAFileName, temp);
165                                 temp.close();
166                                 
167                                 compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names";
168                                 m->openOutputFile(compositeNamesFileName, temp);
169                                 temp.close();
170                         }
171                         
172                         //if the user changes the output directory command factory will send this info to us in the output parameter 
173                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
174                                 outputDir = ""; 
175                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
176                         }
177                         
178                         
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         string temp;
182                         temp = validParameter.validFile(parameters, "lookup", true);
183                         if (temp == "not found")        {       
184                                 lookupFileName = "LookUp_Titanium.pat"; 
185                                 
186                                 int ableToOpen;
187                                 ifstream in;
188                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
189                                 in.close();     
190                                 
191                                 //if you can't open it, try input location
192                                 if (ableToOpen == 1) {
193                                         if (inputDir != "") { //default path is set
194                                                 string tryPath = inputDir + lookupFileName;
195                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
196                                                 ifstream in2;
197                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
198                                                 in2.close();
199                                                 lookupFileName = tryPath;
200                                         }
201                                 }
202                                 
203                                 //if you can't open it, try default location
204                                 if (ableToOpen == 1) {
205                                         if (m->getDefaultPath() != "") { //default path is set
206                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
207                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
208                                                 ifstream in2;
209                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
210                                                 in2.close();
211                                                 lookupFileName = tryPath;
212                                         }
213                                 }
214                                 
215                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
216                                 if (ableToOpen == 1) {
217                                         string exepath = m->argv;
218                                         string tempPath = exepath;
219                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
220                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
221                                         
222                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
223                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
224                                         ifstream in2;
225                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
226                                         in2.close();
227                                         lookupFileName = tryPath;
228                                 }
229                                 
230                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
231                         }
232                         else if(temp == "not open")     {       
233                                 
234                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
235                                 
236                                 //if you can't open it its not inputDir, try mothur excutable location
237                                 string exepath = m->argv;
238                                 string tempPath = exepath;
239                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
240                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
241                                         
242                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
243                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
244                                 ifstream in2;
245                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
246                                 in2.close();
247                                 lookupFileName = tryPath;
248                                 
249                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
250                         }else                                           {       lookupFileName = temp;  }
251                         
252                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
253                         m->setProcessors(temp);
254                         m->mothurConvert(temp, processors);
255
256                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
257                         m->mothurConvert(temp, cutoff); 
258                         
259                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
260                         m->mothurConvert(temp, minDelta); 
261
262                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
263                         m->mothurConvert(temp, maxIters); 
264
265                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
266                         m->mothurConvert(temp, sigma); 
267                         
268                         flowOrder = validParameter.validFile(parameters, "order", false);
269                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
270                         else if(flowOrder.length() != 4){
271                                 m->mothurOut("The value of the order option must be four bases long\n");
272                         }
273                         
274                 }
275                         
276 #ifdef USE_MPI
277                 }                               
278 #endif
279                                 
280         }
281         catch(exception& e) {
282                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
283                 exit(1);
284         }
285 }
286 //**********************************************************************************************************************
287 #ifdef USE_MPI
288 int ShhherCommand::execute(){
289         try {
290                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
291                 
292                 int tag = 1976;
293                 MPI_Status status; 
294
295                 if(pid == 0){
296
297                         for(int i=1;i<ncpus;i++){
298                                 MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
299                         }
300                         if(abort == 1){ return 0;       }
301
302                         processors = ncpus;
303                         
304                         m->mothurOut("\nGetting preliminary data...\n");
305                         getSingleLookUp();      if (m->control_pressed) { return 0; }
306                         getJointLookUp();       if (m->control_pressed) { return 0; }
307                         
308                         vector<string> flowFileVector;
309                         if(flowFilesFileName != "not found"){
310                                 string fName;
311
312                                 ifstream flowFilesFile;
313                                 m->openInputFile(flowFilesFileName, flowFilesFile);
314                                 while(flowFilesFile){
315                                         fName = m->getline(flowFilesFile);
316                                         flowFileVector.push_back(fName);
317                                         m->gobble(flowFilesFile);
318                                 }
319                         }
320                         else{
321                                 flowFileVector.push_back(flowFileName);
322                         }
323                         int numFiles = flowFileVector.size();
324
325                         for(int i=1;i<ncpus;i++){
326                                 MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
327                         }
328                         
329                         for(int i=0;i<numFiles;i++){
330                                 
331                                 if (m->control_pressed) { break; }
332                                 
333                                 double begClock = clock();
334                                 unsigned long long begTime = time(NULL);
335
336                                 flowFileName = flowFileVector[i];
337                                 
338                                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
339                                 m->mothurOut("Reading flowgrams...\n");
340                                 getFlowData();
341                                 
342                                 if (m->control_pressed) { break; }
343
344                                 m->mothurOut("Identifying unique flowgrams...\n");
345                                 getUniques();
346                                 
347                                 if (m->control_pressed) { break; }
348
349                                 m->mothurOut("Calculating distances between flowgrams...\n");
350                                 char fileName[1024];
351                                 strcpy(fileName, flowFileName.c_str());
352
353                                 for(int i=1;i<ncpus;i++){
354                                         MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
355
356                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
357                                         MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
358                                         MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
359                                         MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
360                                         MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
361                                         MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
362                                         MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
363                                         MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
364                                         MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
365                                         MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
366                                 }                               
367                                                         
368                                 string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
369                                 
370                                 if (m->control_pressed) { break; }
371                                 
372                                 int done;
373                                 for(int i=1;i<ncpus;i++){
374                                         MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
375                                         
376                                         m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
377                                         m->mothurRemove((distFileName + ".temp." + toString(i)));
378                                 }
379
380                                 string namesFileName = createNamesFile();
381                                 
382                                 if (m->control_pressed) { break; }
383                                 
384                                 m->mothurOut("\nClustering flowgrams...\n");
385                                 string listFileName = cluster(distFileName, namesFileName);
386                                 
387                                 if (m->control_pressed) { break; }
388                                 
389                 
390                 
391                                 getOTUData(listFileName);
392
393                                 m->mothurRemove(distFileName);
394                                 m->mothurRemove(namesFileName);
395                                 m->mothurRemove(listFileName);
396                                 
397                                 if (m->control_pressed) { break; }
398                                 
399                                 initPyroCluster();
400
401                                 if (m->control_pressed) { break; }
402                                 
403             
404                                 for(int i=1;i<ncpus;i++){
405                                         MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
406                                         MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
407                                         MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
408                                         MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
409                                 }
410                                 
411                                 if (m->control_pressed) { break; }
412                                 
413                                 double maxDelta = 0;
414                                 int iter = 0;
415                                 
416                                 int numOTUsOnCPU = numOTUs / ncpus;
417                                 int numSeqsOnCPU = numSeqs / ncpus;
418                                 m->mothurOut("\nDenoising flowgrams...\n");
419                                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
420                                 
421                                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
422                                         
423                                         double cycClock = clock();
424                                         unsigned long long cycTime = time(NULL);
425                                         fill();
426                                         
427                                         if (m->control_pressed) { break; }
428
429                                         int total = singleTau.size();
430                                         for(int i=1;i<ncpus;i++){
431                                                 MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
432                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
433                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
434                                                 
435                                                 MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
436                                                 MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
437                                                 MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
438                                                 MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
439                                                 MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
440                                         }
441                                         
442                                         calcCentroidsDriver(0, numOTUsOnCPU);
443                                         
444                                         for(int i=1;i<ncpus;i++){
445                                                 int otuStart = i * numOTUs / ncpus;
446                                                 int otuStop = (i + 1) * numOTUs / ncpus;
447                                                 
448                                                 vector<int> tempCentroids(numOTUs, 0);
449                                                 vector<short> tempChange(numOTUs, 0);
450                                                 
451                                                 MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
452                                                 MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
453                                                 
454                                                 for(int j=otuStart;j<otuStop;j++){
455                                                         centroids[j] = tempCentroids[j];
456                                                         change[j] = tempChange[j];
457                                                 }
458                                         }
459                                                                         
460                                         maxDelta = getNewWeights(); if (m->control_pressed) { break; }
461                                         double nLL = getLikelihood(); if (m->control_pressed) { break; }
462                                         checkCentroids(); if (m->control_pressed) { break; }
463                                         
464                                         for(int i=1;i<ncpus;i++){
465                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
466                                                 MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
467                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
468                                         }
469                                         
470                                         calcNewDistancesParent(0, numSeqsOnCPU);
471
472                                         total = singleTau.size();
473
474                                         for(int i=1;i<ncpus;i++){
475                                                 int childTotal;
476                                                 int seqStart = i * numSeqs / ncpus;
477                                                 int seqStop = (i + 1) * numSeqs / ncpus;
478                                                 
479                                                 MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
480
481                                                 vector<int> childSeqIndex(childTotal, 0);
482                                                 vector<double> childSingleTau(childTotal, 0);
483                                                 vector<double> childDist(numSeqs * numOTUs, 0);
484                                                 vector<int> otuIndex(childTotal, 0);
485                                                 
486                                                 MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
487                                                 MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
488                                                 MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
489                                                 MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
490
491                                                 int oldTotal = total;
492                                                 total += childTotal;
493                                                 singleTau.resize(total, 0);
494                                                 seqIndex.resize(total, 0);
495                                                 seqNumber.resize(total, 0);
496                                                 
497                                                 int childIndex = 0;
498                                                 
499                                                 for(int j=oldTotal;j<total;j++){
500                                                         int otuI = otuIndex[childIndex];
501                                                         int seqI = childSeqIndex[childIndex];
502                                                         
503                                                         singleTau[j] = childSingleTau[childIndex];
504                                                         
505                                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
506                                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
507                                                         nSeqsPerOTU[otuI]++;
508                                                         childIndex++;
509                                                 }
510                                                 
511                                                 int index = seqStart * numOTUs;
512                                                 for(int j=seqStart;j<seqStop;j++){
513                                                         for(int k=0;k<numOTUs;k++){
514                                                                 dist[index] = childDist[index];
515                                                                 index++;
516                                                         }
517                                                 }                                       
518                                         }
519                                         
520                                         iter++;
521                                         
522                                         m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
523
524                                         if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
525                                                 int live = 1;
526                                                 for(int i=1;i<ncpus;i++){
527                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
528                                                 }
529                                         }
530                                         else{
531                                                 int live = 0;
532                                                 for(int i=1;i<ncpus;i++){
533                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
534                                                 }
535                                         }
536                                         
537                                 }       
538                                 
539                                 if (m->control_pressed) { break; }
540                                 
541                                 m->mothurOut("\nFinalizing...\n");
542                                 fill();
543                                 
544                                 if (m->control_pressed) { break; }
545                                 
546                                 setOTUs();
547                                 
548                                 vector<int> otuCounts(numOTUs, 0);
549                                 for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
550                                 calcCentroidsDriver(0, numOTUs);
551                                 
552                                 if (m->control_pressed) { break; }
553                                 
554                                 writeQualities(otuCounts);      if (m->control_pressed) { break; }
555                                 writeSequences(otuCounts);      if (m->control_pressed) { break; }
556                                 writeNames(otuCounts);          if (m->control_pressed) { break; }
557                                 writeClusters(otuCounts);       if (m->control_pressed) { break; }
558                                 writeGroups();                          if (m->control_pressed) { break; }
559                                 
560                                                                  
561                                 m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
562                         }
563                 }
564                 else{
565                         int abort = 1;
566
567                         MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
568                         if(abort){      return 0;       }
569
570                         int numFiles;
571                         MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
572
573                         for(int i=0;i<numFiles;i++){
574                                 
575                                 if (m->control_pressed) { break; }
576                                 
577                                 //Now into the pyrodist part
578                                 bool live = 1;
579
580                                 char fileName[1024];
581                                 MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
582                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
583                                 MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
584                                 MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
585                                 
586                                 flowDataIntI.resize(numSeqs * numFlowCells);
587                                 flowDataPrI.resize(numSeqs * numFlowCells);
588                                 mapUniqueToSeq.resize(numSeqs);
589                                 mapSeqToUnique.resize(numSeqs);
590                                 lengths.resize(numSeqs);
591                                 jointLookUp.resize(NUMBINS * NUMBINS);
592
593                                 MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
594                                 MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
595                                 MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
596                                 MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
597                                 MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
598                                 MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
599                                 MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
600
601                                 flowFileName = string(fileName);
602                                 int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
603                                 int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
604                                 
605                                 string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
606                                 
607                                 if (m->control_pressed) { break; }
608                                 
609                                 int done = 1;
610                                 MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
611
612                                 //Now into the pyronoise part
613                                 MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
614                                 
615                                 singleLookUp.resize(HOMOPS * NUMBINS);
616                                 uniqueFlowgrams.resize(numUniques * numFlowCells);
617                                 weight.resize(numOTUs);
618                                 centroids.resize(numOTUs);
619                                 change.resize(numOTUs);
620                                 dist.assign(numOTUs * numSeqs, 0);
621                                 nSeqsPerOTU.resize(numOTUs);
622                                 cumNumSeqs.resize(numOTUs);
623
624                                 MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
625                                 MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
626                                 MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
627                                 
628                                 int startOTU = pid * numOTUs / ncpus;
629                                 int endOTU = (pid + 1) * numOTUs / ncpus;
630
631                                 int startSeq = pid * numSeqs / ncpus;
632                                 int endSeq = (pid + 1) * numSeqs /ncpus;
633                                 
634                                 int total;
635
636                                 while(live){
637                                         
638                                         if (m->control_pressed) { break; }
639                                         
640                                         MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
641                                         singleTau.assign(total, 0.0000);
642                                         seqNumber.assign(total, 0);
643                                         seqIndex.assign(total, 0);
644
645                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
646                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
647                                         MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
648                                         MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
649                                         MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
650                                         MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
651                                         MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
652
653                                         calcCentroidsDriver(startOTU, endOTU);
654
655                                         MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
656                                         MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
657
658                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
659                                         MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
660                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
661
662                                         vector<int> otuIndex(total, 0);
663                                         calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
664                                         total = otuIndex.size();
665                                         
666                                         MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
667                                         MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
668                                         MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
669                                         MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
670                                         MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
671                                 
672                                         MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
673                                 }
674                         }
675                 }
676                 
677                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
678                 
679                 MPI_Barrier(MPI_COMM_WORLD);
680
681                 
682                 if(compositeFASTAFileName != ""){
683                         outputNames.push_back(compositeFASTAFileName);
684                         outputNames.push_back(compositeNamesFileName);
685                 }
686
687                 m->mothurOutEndLine();
688                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
689                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
690                 m->mothurOutEndLine();
691                 
692                 return 0;
693
694         }
695         catch(exception& e) {
696                 m->errorOut(e, "ShhherCommand", "execute");
697                 exit(1);
698         }
699 }
700
701 /**************************************************************************************************/
702
703 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
704         try{            
705                 ostringstream outStream;
706                 outStream.setf(ios::fixed, ios::floatfield);
707                 outStream.setf(ios::dec, ios::basefield);
708                 outStream.setf(ios::showpoint);
709                 outStream.precision(6);
710                 
711                 int begTime = time(NULL);
712                 double begClock = clock();
713                 
714                 for(int i=startSeq;i<stopSeq;i++){
715                         
716                         if (m->control_pressed) { break; }
717                         
718                         for(int j=0;j<i;j++){
719                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
720                                 
721                                 if(flowDistance < 1e-6){
722                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
723                                 }
724                                 else if(flowDistance <= cutoff){
725                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
726                                 }
727                         }
728                         if(i % 100 == 0){
729                                 m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
730                         }
731                 }
732                 
733                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
734                 if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
735                 
736                 if (m->control_pressed) { return fDistFileName; }
737                 
738                 m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
739
740                 ofstream distFile(fDistFileName.c_str());
741                 distFile << outStream.str();            
742                 distFile.close();
743                 
744                 return fDistFileName;
745         }
746         catch(exception& e) {
747                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
748                 exit(1);
749         }
750 }
751
752 #else
753 //**********************************************************************************************************************
754
755 int ShhherCommand::execute(){
756         try {
757                 if (abort == true) { return 0; }
758                 
759                 getSingleLookUp();      if (m->control_pressed) { return 0; }
760                 getJointLookUp();       if (m->control_pressed) { return 0; }
761                                 
762                 
763                 vector<string> flowFileVector;
764                 if(flowFilesFileName != "not found"){
765                         string fName;
766                         
767                         ifstream flowFilesFile;
768                         m->openInputFile(flowFilesFileName, flowFilesFile);
769                         while(flowFilesFile){
770                                 fName = m->getline(flowFilesFile);
771                                 flowFileVector.push_back(fName);
772                                 m->gobble(flowFilesFile);
773                         }
774                 }
775                 else{
776                         flowFileVector.push_back(flowFileName);
777                 }
778                 int numFiles = flowFileVector.size();
779                 
780                 
781                 for(int i=0;i<numFiles;i++){
782                         
783                         if (m->control_pressed) { break; }
784                         
785                         flowFileName = flowFileVector[i];
786
787                         m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
788                         m->mothurOut("Reading flowgrams...\n");
789                         getFlowData();
790                         
791                         if (m->control_pressed) { break; }
792                         
793                         m->mothurOut("Identifying unique flowgrams...\n");
794                         getUniques();
795                         
796                         if (m->control_pressed) { break; }
797                         
798                         m->mothurOut("Calculating distances between flowgrams...\n");
799                         string distFileName = createDistFile(processors);
800                         string namesFileName = createNamesFile();
801                         
802                         if (m->control_pressed) { break; }
803                         
804                         m->mothurOut("\nClustering flowgrams...\n");
805                         string listFileName = cluster(distFileName, namesFileName);
806                         
807                         if (m->control_pressed) { break; }
808                         
809                         getOTUData(listFileName);
810                         
811                         if (m->control_pressed) { break; }
812                         
813                         m->mothurRemove(distFileName);
814                         m->mothurRemove(namesFileName);
815                         m->mothurRemove(listFileName);
816                         
817                         initPyroCluster();
818                         
819                         if (m->control_pressed) { break; }
820                         
821                         double maxDelta = 0;
822                         int iter = 0;
823                         
824                         double begClock = clock();
825                         unsigned long long begTime = time(NULL);
826
827                         m->mothurOut("\nDenoising flowgrams...\n");
828                         m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
829                         
830                         while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
831                                 
832                                 if (m->control_pressed) { break; }
833                                 
834                                 double cycClock = clock();
835                                 unsigned long long cycTime = time(NULL);
836                                 fill();
837                                 
838                                 if (m->control_pressed) { break; }
839
840                                 calcCentroids();
841                                 
842                                 if (m->control_pressed) { break; }
843
844                                 maxDelta = getNewWeights();  if (m->control_pressed) { break; }
845                                 double nLL = getLikelihood(); if (m->control_pressed) { break; }
846                                 checkCentroids();
847                                 
848                                 if (m->control_pressed) { break; }
849                                 
850                                 calcNewDistances();
851                                 
852                                 if (m->control_pressed) { break; }
853                                 
854                                 iter++;
855                                 
856                                 m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
857
858                         }       
859                         
860                         if (m->control_pressed) { break; }
861                         
862                         m->mothurOut("\nFinalizing...\n");
863                         fill();
864                         
865                         if (m->control_pressed) { break; }
866                         
867                         setOTUs();
868                         
869                         if (m->control_pressed) { break; }
870                         
871                         vector<int> otuCounts(numOTUs, 0);
872                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
873                         
874                         calcCentroidsDriver(0, numOTUs);        if (m->control_pressed) { break; }
875                         writeQualities(otuCounts);                      if (m->control_pressed) { break; }
876                         writeSequences(otuCounts);                      if (m->control_pressed) { break; }
877                         writeNames(otuCounts);                          if (m->control_pressed) { break; }
878                         writeClusters(otuCounts);                       if (m->control_pressed) { break; }
879                         writeGroups();                                          if (m->control_pressed) { break; }
880                         
881                         m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
882                 }
883                 
884                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
885
886                 
887                 if(compositeFASTAFileName != ""){
888                         outputNames.push_back(compositeFASTAFileName);
889                         outputNames.push_back(compositeNamesFileName);
890                 }
891
892                 m->mothurOutEndLine();
893                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
894                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
895                 m->mothurOutEndLine();
896                 
897                 return 0;
898         }
899         catch(exception& e) {
900                 m->errorOut(e, "ShhherCommand", "execute");
901                 exit(1);
902         }
903 }
904 #endif
905 /**************************************************************************************************/
906
907 void ShhherCommand::getFlowData(){
908         try{
909                 ifstream flowFile;
910                 m->openInputFile(flowFileName, flowFile);
911                 
912                 string seqName;
913                 seqNameVector.clear();
914                 lengths.clear();
915                 flowDataIntI.clear();
916                 nameMap.clear();
917                 
918                 
919                 int currentNumFlowCells;
920                 
921                 float intensity;
922                 
923                 flowFile >> numFlowCells;
924                 int index = 0;//pcluster
925                 while(!flowFile.eof()){
926                         
927                         if (m->control_pressed) { break; }
928                         
929                         flowFile >> seqName >> currentNumFlowCells;
930                         lengths.push_back(currentNumFlowCells);
931
932                         seqNameVector.push_back(seqName);
933                         nameMap[seqName] = index++;//pcluster
934
935                         for(int i=0;i<numFlowCells;i++){
936                                 flowFile >> intensity;
937                                 if(intensity > 9.99)    {       intensity = 9.99;       }
938                                 int intI = int(100 * intensity + 0.0001);
939                                 flowDataIntI.push_back(intI);
940                         }
941                         m->gobble(flowFile);
942                 }
943                 flowFile.close();
944                 
945                 numSeqs = seqNameVector.size();         
946                 
947                 for(int i=0;i<numSeqs;i++){
948                         
949                         if (m->control_pressed) { break; }
950                         
951                         int iNumFlowCells = i * numFlowCells;
952                         for(int j=lengths[i];j<numFlowCells;j++){
953                                 flowDataIntI[iNumFlowCells + j] = 0;
954                         }
955                 }
956                 
957         }
958         catch(exception& e) {
959                 m->errorOut(e, "ShhherCommand", "getFlowData");
960                 exit(1);
961         }
962 }
963
964 /**************************************************************************************************/
965
966 void ShhherCommand::getSingleLookUp(){
967         try{
968                 //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
969                 singleLookUp.assign(HOMOPS * NUMBINS, 0);
970                 
971                 int index = 0;
972                 ifstream lookUpFile;
973                 m->openInputFile(lookupFileName, lookUpFile);
974                 
975                 for(int i=0;i<HOMOPS;i++){
976                         
977                         if (m->control_pressed) { break; }
978                         
979                         float logFracFreq;
980                         lookUpFile >> logFracFreq;
981                         
982                         for(int j=0;j<NUMBINS;j++)      {
983                                 lookUpFile >> singleLookUp[index];
984                                 index++;
985                         }
986                 }       
987                 lookUpFile.close();
988         }
989         catch(exception& e) {
990                 m->errorOut(e, "ShhherCommand", "getSingleLookUp");
991                 exit(1);
992         }
993 }
994
995 /**************************************************************************************************/
996
997 void ShhherCommand::getJointLookUp(){
998         try{
999                 
1000                 //      the most likely joint probability (-log) that two intenities have the same polymer length
1001                 jointLookUp.resize(NUMBINS * NUMBINS, 0);
1002                 
1003                 for(int i=0;i<NUMBINS;i++){
1004                         
1005                         if (m->control_pressed) { break; }
1006                         
1007                         for(int j=0;j<NUMBINS;j++){             
1008                                 
1009                                 double minSum = 100000000;
1010                                 
1011                                 for(int k=0;k<HOMOPS;k++){
1012                                         double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
1013                                         
1014                                         if(sum < minSum)        {       minSum = sum;           }
1015                                 }       
1016                                 jointLookUp[i * NUMBINS + j] = minSum;
1017                         }
1018                 }
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "ShhherCommand", "getJointLookUp");
1022                 exit(1);
1023         }
1024 }
1025
1026 /**************************************************************************************************/
1027
1028 double ShhherCommand::getProbIntensity(int intIntensity){                          
1029         try{
1030
1031                 double minNegLogProb = 100000000; 
1032
1033                 
1034                 for(int i=0;i<HOMOPS;i++){//loop signal strength
1035                         
1036                         if (m->control_pressed) { break; }
1037                         
1038                         float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
1039                         if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
1040                 }
1041                 
1042                 return minNegLogProb;
1043         }
1044         catch(exception& e) {
1045                 m->errorOut(e, "ShhherCommand", "getProbIntensity");
1046                 exit(1);
1047         }
1048 }
1049
1050 /**************************************************************************************************/
1051
1052 void ShhherCommand::getUniques(){
1053         try{
1054                 
1055                 
1056                 numUniques = 0;
1057                 uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
1058                 uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
1059                 uniqueLengths.assign(numSeqs, 0);
1060                 mapSeqToUnique.assign(numSeqs, -1);
1061                 mapUniqueToSeq.assign(numSeqs, -1);
1062                 
1063                 vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
1064                 
1065                 for(int i=0;i<numSeqs;i++){
1066                         
1067                         if (m->control_pressed) { break; }
1068                         
1069                         int index = 0;
1070                         
1071                         vector<short> current(numFlowCells);
1072                         for(int j=0;j<numFlowCells;j++){
1073                                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
1074                         }
1075                                                 
1076                         for(int j=0;j<numUniques;j++){
1077                                 int offset = j * numFlowCells;
1078                                 bool toEnd = 1;
1079                                 
1080                                 int shorterLength;
1081                                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
1082                                 else                                                            {       shorterLength = uniqueLengths[j];       }
1083
1084                                 for(int k=0;k<shorterLength;k++){
1085                                         if(current[k] != uniqueFlowgrams[offset + k]){
1086                                                 toEnd = 0;
1087                                                 break;
1088                                         }
1089                                 }
1090                                 
1091                                 if(toEnd){
1092                                         mapSeqToUnique[i] = j;
1093                                         uniqueCount[j]++;
1094                                         index = j;
1095                                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
1096                                         break;
1097                                 }
1098                                 index++;
1099                         }
1100                         
1101                         if(index == numUniques){
1102                                 uniqueLengths[numUniques] = lengths[i];
1103                                 uniqueCount[numUniques] = 1;
1104                                 mapSeqToUnique[i] = numUniques;//anMap
1105                                 mapUniqueToSeq[numUniques] = i;//anF
1106                                 
1107                                 for(int k=0;k<numFlowCells;k++){
1108                                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
1109                                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
1110                                 }
1111                                 
1112                                 numUniques++;
1113                         }
1114                 }
1115                 uniqueFlowDataIntI.resize(numFlowCells * numUniques);
1116                 uniqueLengths.resize(numUniques);       
1117                 uniqueFlowgrams.resize(numFlowCells * numUniques);
1118         
1119                 flowDataPrI.resize(numSeqs * numFlowCells, 0);
1120                 for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
1121         }
1122         catch(exception& e) {
1123                 m->errorOut(e, "ShhherCommand", "getUniques");
1124                 exit(1);
1125         }
1126 }
1127
1128 /**************************************************************************************************/
1129
1130 float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
1131         try{
1132                 int minLength = lengths[mapSeqToUnique[seqA]];
1133                 if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
1134                 
1135                 int ANumFlowCells = seqA * numFlowCells;
1136                 int BNumFlowCells = seqB * numFlowCells;
1137                 
1138                 float dist = 0;
1139                 
1140                 for(int i=0;i<minLength;i++){
1141                         
1142                         if (m->control_pressed) { break; }
1143                         
1144                         int flowAIntI = flowDataIntI[ANumFlowCells + i];
1145                         float flowAPrI = flowDataPrI[ANumFlowCells + i];
1146                         
1147                         int flowBIntI = flowDataIntI[BNumFlowCells + i];
1148                         float flowBPrI = flowDataPrI[BNumFlowCells + i];
1149                         dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
1150                 }
1151                 
1152                 dist /= (float) minLength;
1153                 return dist;
1154         }
1155         catch(exception& e) {
1156                 m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
1157                 exit(1);
1158         }
1159 }
1160
1161 /**************************************************************************************************/
1162
1163 void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int stopSeq){
1164         try{            
1165
1166                 ostringstream outStream;
1167                 outStream.setf(ios::fixed, ios::floatfield);
1168                 outStream.setf(ios::dec, ios::basefield);
1169                 outStream.setf(ios::showpoint);
1170                 outStream.precision(6);
1171                 
1172                 int begTime = time(NULL);
1173                 double begClock = clock();
1174
1175                 for(int i=startSeq;i<stopSeq;i++){
1176                         
1177                         if (m->control_pressed) { break; }
1178                         
1179                         for(int j=0;j<i;j++){
1180                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
1181
1182                                 if(flowDistance < 1e-6){
1183                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
1184                                 }
1185                                 else if(flowDistance <= cutoff){
1186                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
1187                                 }
1188                         }
1189                         if(i % 100 == 0){
1190                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
1191                                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
1192                                 m->mothurOutEndLine();
1193                         }
1194                 }
1195                 
1196                 ofstream distFile(distFileName.c_str());
1197                 distFile << outStream.str();            
1198                 distFile.close();
1199                 
1200                 if (m->control_pressed) {}
1201                 else {
1202                         m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
1203                         m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
1204                         m->mothurOutEndLine();
1205                 }
1206         }
1207         catch(exception& e) {
1208                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
1209                 exit(1);
1210         }
1211 }
1212
1213 /**************************************************************************************************/
1214
1215 string ShhherCommand::createDistFile(int processors){
1216         try{
1217 //////////////////////// until I figure out the shared memory issue //////////////////////              
1218 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1219 #else
1220                 processors=1;
1221 #endif
1222 //////////////////////// until I figure out the shared memory issue //////////////////////              
1223                 
1224                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
1225                                 
1226                 unsigned long long begTime = time(NULL);
1227                 double begClock = clock();
1228                 
1229                 if (numSeqs < processors){      processors = 1; }
1230                 
1231                 if(processors == 1)     {       flowDistParentFork(fDistFileName, 0, numUniques);               }
1232                 
1233                 else{ //you have multiple processors
1234                         
1235                         vector<int> start(processors, 0);
1236                         vector<int> end(processors, 0);
1237                         
1238                         int process = 1;
1239                         vector<int> processIDs;
1240                         
1241                         for (int i = 0; i < processors; i++) {
1242                                 start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
1243                                 end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
1244                         }
1245                         
1246 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1247                 
1248                         //loop through and create all the processes you want
1249                         while (process != processors) {
1250                                 int pid = fork();
1251                                 
1252                                 if (pid > 0) {
1253                                         processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1254                                         process++;
1255                                 }else if (pid == 0){
1256                                         flowDistParentFork(fDistFileName + toString(getpid()) + ".temp", start[process], end[process]);
1257                                         exit(0);
1258                                 }else { 
1259                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
1260                                         perror(" : ");
1261                                         for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
1262                                         exit(0);
1263                                 }
1264                         }
1265                         
1266                         //parent does its part
1267                         flowDistParentFork(fDistFileName, start[0], end[0]);
1268                         
1269                         //force parent to wait until all the processes are done
1270                         for (int i=0;i<processIDs.size();i++) { 
1271                                 int temp = processIDs[i];
1272                                 wait(&temp);
1273                         }
1274 #else
1275                         //////////////////////////////////////////////////////////////////////////////////////////////////////
1276                         //Windows version shared memory, so be careful when passing variables through the flowDistParentForkData struct. 
1277                         //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1278                         //////////////////////////////////////////////////////////////////////////////////////////////////////
1279                         
1280                         vector<flowDistParentForkData*> pDataArray; 
1281                         DWORD   dwThreadIdArray[processors-1];
1282                         HANDLE  hThreadArray[processors-1]; 
1283                         
1284                         //Create processor worker threads.
1285                         for(int i = 0; i < processors-1; i++){
1286                                 // Allocate memory for thread data.
1287                                 string extension = extension = toString(i) + ".temp"; 
1288                                 
1289                                 flowDistParentForkData* tempdist = new flowDistParentForkData((fDistFileName + extension), mapUniqueToSeq, mapSeqToUnique, lengths, flowDataIntI, flowDataPrI, jointLookUp, m, start[i+1], end[i+1], numFlowCells, cutoff, i);
1290                                 pDataArray.push_back(tempdist);
1291                                 processIDs.push_back(i);
1292                                 
1293                                 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1294                                 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1295                                 hThreadArray[i] = CreateThread(NULL, 0, MyflowDistParentForkThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1296                         }
1297                         
1298                         //parent does its part
1299                         flowDistParentFork(fDistFileName, start[0], end[0]);
1300                         
1301                         //Wait until all threads have terminated.
1302                         WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1303                         
1304                         //Close all thread handles and free memory allocations.
1305                         for(int i=0; i < pDataArray.size(); i++){
1306                                 CloseHandle(hThreadArray[i]);
1307                                 delete pDataArray[i];
1308                         }
1309                         
1310 #endif
1311                         
1312                         //append and remove temp files
1313                         for (int i=0;i<processIDs.size();i++) { 
1314                                 m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
1315                                 m->mothurRemove((fDistFileName + toString(processIDs[i]) + ".temp"));
1316                         }
1317                         
1318                 }
1319                 
1320                 m->mothurOutEndLine();
1321                 m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
1322                 
1323                 return fDistFileName;
1324         }
1325         catch(exception& e) {
1326                 m->errorOut(e, "ShhherCommand", "createDistFile");
1327                 exit(1);
1328         }
1329         
1330 }
1331
1332 /**************************************************************************************************/
1333
1334 string ShhherCommand::createNamesFile(){
1335         try{
1336                 
1337                 vector<string> duplicateNames(numUniques, "");
1338                 for(int i=0;i<numSeqs;i++){
1339                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
1340                 }
1341                 
1342                 string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
1343                 
1344                 ofstream nameFile;
1345                 m->openOutputFile(nameFileName, nameFile);
1346                 
1347                 for(int i=0;i<numUniques;i++){
1348                         
1349                         if (m->control_pressed) { break; }
1350                         
1351 //                      nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
1352                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
1353                 }
1354                 
1355                 nameFile.close();
1356                 return  nameFileName;
1357         }
1358         catch(exception& e) {
1359                 m->errorOut(e, "ShhherCommand", "createNamesFile");
1360                 exit(1);
1361         }
1362 }
1363
1364 //**********************************************************************************************************************
1365
1366 string ShhherCommand::cluster(string distFileName, string namesFileName){
1367         try {
1368                 
1369                 ReadMatrix* read = new ReadColumnMatrix(distFileName);  
1370                 read->setCutoff(cutoff);
1371                 
1372                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
1373                 clusterNameMap->readMap();
1374                 read->read(clusterNameMap);
1375         
1376                 ListVector* list = read->getListVector();
1377                 SparseMatrix* matrix = read->getMatrix();
1378                 
1379                 delete read; 
1380                 delete clusterNameMap; 
1381                                 
1382                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
1383                 
1384                 Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
1385                 string tag = cluster->getTag();
1386                 
1387                 double clusterCutoff = cutoff;
1388                 while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
1389                         
1390                         if (m->control_pressed) { break; }
1391                         
1392                         cluster->update(clusterCutoff);
1393                 }
1394                 
1395                 list->setLabel(toString(cutoff));
1396                 
1397                 string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
1398                 ofstream listFile;
1399                 m->openOutputFile(listFileName, listFile);
1400                 list->print(listFile);
1401                 listFile.close();
1402                 
1403                 delete matrix;  delete cluster; delete rabund; delete list;
1404         
1405                 return listFileName;
1406         }
1407         catch(exception& e) {
1408                 m->errorOut(e, "ShhherCommand", "cluster");
1409                 exit(1);        
1410         }               
1411 }
1412
1413 /**************************************************************************************************/
1414
1415 void ShhherCommand::getOTUData(string listFileName){
1416         try {
1417
1418                 ifstream listFile;
1419                 m->openInputFile(listFileName, listFile);
1420                 string label;
1421                 
1422                 listFile >> label >> numOTUs;
1423
1424                 otuData.assign(numSeqs, 0);
1425                 cumNumSeqs.assign(numOTUs, 0);
1426                 nSeqsPerOTU.assign(numOTUs, 0);
1427                 aaP.clear();aaP.resize(numOTUs);
1428                 
1429                 seqNumber.clear();
1430                 aaI.clear();
1431                 seqIndex.clear();
1432                 
1433                 string singleOTU = "";
1434                 
1435                 for(int i=0;i<numOTUs;i++){
1436                         
1437                         if (m->control_pressed) { break; }
1438
1439                         listFile >> singleOTU;
1440                         
1441                         istringstream otuString(singleOTU);
1442
1443                         while(otuString){
1444                                 
1445                                 string seqName = "";
1446                                 
1447                                 for(int j=0;j<singleOTU.length();j++){
1448                                         char letter = otuString.get();
1449                                         
1450                                         if(letter != ','){
1451                                                 seqName += letter;
1452                                         }
1453                                         else{
1454                                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
1455                                                 int index = nmIt->second;
1456                                                 
1457                                                 nameMap.erase(nmIt);
1458                                                 
1459                                                 otuData[index] = i;
1460                                                 nSeqsPerOTU[i]++;
1461                                                 aaP[i].push_back(index);
1462                                                 seqName = "";
1463                                         }
1464                                 }
1465                                 
1466                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
1467
1468                                 int index = nmIt->second;
1469                                 nameMap.erase(nmIt);
1470
1471                                 otuData[index] = i;
1472                                 nSeqsPerOTU[i]++;
1473                                 aaP[i].push_back(index);        
1474                                 
1475                                 otuString.get();
1476                         }
1477                         
1478                         sort(aaP[i].begin(), aaP[i].end());
1479                         for(int j=0;j<nSeqsPerOTU[i];j++){
1480                                 seqNumber.push_back(aaP[i][j]);
1481                         }
1482                         for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
1483                                 aaP[i].push_back(0);
1484                         }
1485                         
1486                         
1487                 }
1488                 
1489                 for(int i=1;i<numOTUs;i++){
1490                         cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
1491                 }
1492                 aaI = aaP;
1493                 seqIndex = seqNumber;
1494                 
1495                 listFile.close();       
1496                 
1497         }
1498         catch(exception& e) {
1499                 m->errorOut(e, "ShhherCommand", "getOTUData");
1500                 exit(1);        
1501         }               
1502 }
1503
1504 /**************************************************************************************************/
1505
1506 void ShhherCommand::initPyroCluster(){                          
1507         try{
1508                 if (numOTUs < processors) { processors = 1; }
1509
1510                 dist.assign(numSeqs * numOTUs, 0);
1511                 change.assign(numOTUs, 1);
1512                 centroids.assign(numOTUs, -1);
1513                 weight.assign(numOTUs, 0);
1514                 singleTau.assign(numSeqs, 1.0);
1515                 
1516                 nSeqsBreaks.assign(processors+1, 0);
1517                 nOTUsBreaks.assign(processors+1, 0);
1518
1519                 nSeqsBreaks[0] = 0;
1520                 for(int i=0;i<processors;i++){
1521                         nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
1522                         nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
1523                 }
1524                 nSeqsBreaks[processors] = numSeqs;
1525                 nOTUsBreaks[processors] = numOTUs;
1526         }
1527         catch(exception& e) {
1528                 m->errorOut(e, "ShhherCommand", "initPyroCluster");
1529                 exit(1);        
1530         }
1531 }
1532
1533 /**************************************************************************************************/
1534
1535 void ShhherCommand::fill(){
1536         try {
1537                 int index = 0;
1538                 for(int i=0;i<numOTUs;i++){
1539                         
1540                         if (m->control_pressed) { break; }
1541                         
1542                         cumNumSeqs[i] = index;
1543                         for(int j=0;j<nSeqsPerOTU[i];j++){
1544                                 seqNumber[index] = aaP[i][j];
1545                                 seqIndex[index] = aaI[i][j];
1546                                 
1547                                 index++;
1548                         }
1549                 }
1550         }
1551         catch(exception& e) {
1552                 m->errorOut(e, "ShhherCommand", "fill");
1553                 exit(1);        
1554         }               
1555 }
1556
1557 /**************************************************************************************************/
1558
1559 void ShhherCommand::calcCentroids(){                          
1560         try{
1561                 
1562 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1563                 
1564                 if(processors == 1)     {
1565                         calcCentroidsDriver(0, numOTUs);                
1566                 }
1567                 else{ //you have multiple processors
1568                         if (numOTUs < processors){      processors = 1; }
1569                         
1570                         int process = 1;
1571                         vector<int> processIDs;
1572                         
1573                         //loop through and create all the processes you want
1574                         while (process != processors) {
1575                                 int pid = vfork();
1576                                 
1577                                 if (pid > 0) {
1578                                         processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1579                                         process++;
1580                                 }else if (pid == 0){
1581                                         calcCentroidsDriver(nOTUsBreaks[process], nOTUsBreaks[process+1]);
1582                                         exit(0);
1583                                 }else { 
1584                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
1585                                         perror(" : ");
1586                                         for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
1587                                         exit(0);
1588                                 }
1589                         }
1590                         
1591                         //parent does its part
1592                         calcCentroidsDriver(nOTUsBreaks[0], nOTUsBreaks[1]);
1593
1594                         //force parent to wait until all the processes are done
1595                         for (int i=0;i<processIDs.size();i++) { 
1596                                 int temp = processIDs[i];
1597                                 wait(&temp);
1598                         }
1599                 }
1600                 
1601 #else
1602                 calcCentroidsDriver(0, numOTUs);
1603 #endif          
1604         }
1605         catch(exception& e) {
1606                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1607                 exit(1);        
1608         }               
1609 }
1610
1611 /**************************************************************************************************/
1612
1613 void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
1614         
1615         //this function gets the most likely homopolymer length at a flow position for a group of sequences
1616         //within an otu
1617         
1618         try{
1619                 
1620                 for(int i=start;i<finish;i++){
1621                         
1622                         if (m->control_pressed) { break; }
1623                         
1624                         double count = 0;
1625                         int position = 0;
1626                         int minFlowGram = 100000000;
1627                         double minFlowValue = 1e8;
1628                         change[i] = 0; //FALSE
1629                         
1630                         for(int j=0;j<nSeqsPerOTU[i];j++){
1631                                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1632                         }
1633
1634                         if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1635                                 vector<double> adF(nSeqsPerOTU[i]);
1636                                 vector<int> anL(nSeqsPerOTU[i]);
1637                                 
1638                                 for(int j=0;j<nSeqsPerOTU[i];j++){
1639                                         int index = cumNumSeqs[i] + j;
1640                                         int nI = seqIndex[index];
1641                                         int nIU = mapSeqToUnique[nI];
1642                                         
1643                                         int k;
1644                                         for(k=0;k<position;k++){
1645                                                 if(nIU == anL[k]){
1646                                                         break;
1647                                                 }
1648                                         }
1649                                         if(k == position){
1650                                                 anL[position] = nIU;
1651                                                 adF[position] = 0.0000;
1652                                                 position++;
1653                                         }                                               
1654                                 }
1655                                 
1656                                 for(int j=0;j<nSeqsPerOTU[i];j++){
1657                                         int index = cumNumSeqs[i] + j;
1658                                         int nI = seqIndex[index];
1659                                         
1660                                         double tauValue = singleTau[seqNumber[index]];
1661                                         
1662                                         for(int k=0;k<position;k++){
1663                                                 double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
1664                                                 adF[k] += dist * tauValue;
1665                                         }
1666                                 }
1667                                 
1668                                 for(int j=0;j<position;j++){
1669                                         if(adF[j] < minFlowValue){
1670                                                 minFlowGram = j;
1671                                                 minFlowValue = adF[j];
1672                                         }
1673                                 }
1674                                 
1675                                 if(centroids[i] != anL[minFlowGram]){
1676                                         change[i] = 1;
1677                                         centroids[i] = anL[minFlowGram];
1678                                 }
1679                         }
1680                         else if(centroids[i] != -1){
1681                                 change[i] = 1;
1682                                 centroids[i] = -1;                      
1683                         }
1684                 }
1685         }
1686         catch(exception& e) {
1687                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1688                 exit(1);        
1689         }               
1690 }
1691
1692 /**************************************************************************************************/
1693
1694 double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
1695         try{
1696                 
1697                 int flowAValue = cent * numFlowCells;
1698                 int flowBValue = flow * numFlowCells;
1699                 
1700                 double dist = 0;
1701
1702                 for(int i=0;i<length;i++){
1703                         dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1704                         flowAValue++;
1705                         flowBValue++;
1706                 }
1707                 
1708                 return dist / (double)length;
1709         }
1710         catch(exception& e) {
1711                 m->errorOut(e, "ShhherCommand", "getDistToCentroid");
1712                 exit(1);        
1713         }               
1714 }
1715
1716 /**************************************************************************************************/
1717
1718 double ShhherCommand::getNewWeights(){
1719         try{
1720                 
1721                 double maxChange = 0;
1722                 
1723                 for(int i=0;i<numOTUs;i++){
1724                         
1725                         if (m->control_pressed) { break; }
1726                         
1727                         double difference = weight[i];
1728                         weight[i] = 0;
1729                         
1730                         for(int j=0;j<nSeqsPerOTU[i];j++){
1731                                 int index = cumNumSeqs[i] + j;
1732                                 double tauValue = singleTau[seqNumber[index]];
1733                                 weight[i] += tauValue;
1734                         }
1735                         
1736                         difference = fabs(weight[i] - difference);
1737                         if(difference > maxChange){     maxChange = difference; }
1738                 }
1739                 return maxChange;
1740         }
1741         catch(exception& e) {
1742                 m->errorOut(e, "ShhherCommand", "getNewWeights");
1743                 exit(1);        
1744         }               
1745 }
1746
1747 /**************************************************************************************************/
1748
1749 double ShhherCommand::getLikelihood(){
1750         
1751         try{
1752                 
1753                 vector<long double> P(numSeqs, 0);
1754                 int effNumOTUs = 0;
1755                 
1756                 for(int i=0;i<numOTUs;i++){
1757                         if(weight[i] > MIN_WEIGHT){
1758                                 effNumOTUs++;
1759                         }
1760                 }
1761                 
1762                 string hold;
1763                 for(int i=0;i<numOTUs;i++){
1764                         
1765                         if (m->control_pressed) { break; }
1766                         
1767                         for(int j=0;j<nSeqsPerOTU[i];j++){
1768                                 int index = cumNumSeqs[i] + j;
1769                                 int nI = seqIndex[index];
1770                                 double singleDist = dist[seqNumber[index]];
1771                                 
1772                                 P[nI] += weight[i] * exp(-singleDist * sigma);
1773                         }
1774                 }
1775                 double nLL = 0.00;
1776                 for(int i=0;i<numSeqs;i++){
1777                         if(P[i] == 0){  P[i] = DBL_EPSILON;     }
1778
1779                         nLL += -log(P[i]);
1780                 }
1781                 
1782                 nLL = nLL -(double)numSeqs * log(sigma);
1783
1784                 return nLL; 
1785         }
1786         catch(exception& e) {
1787                 m->errorOut(e, "ShhherCommand", "getNewWeights");
1788                 exit(1);        
1789         }               
1790 }
1791
1792 /**************************************************************************************************/
1793
1794 void ShhherCommand::checkCentroids(){
1795         try{
1796                 vector<int> unique(numOTUs, 1);
1797                 
1798                 for(int i=0;i<numOTUs;i++){
1799                         if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
1800                                 unique[i] = -1;
1801                         }
1802                 }
1803                 
1804                 for(int i=0;i<numOTUs;i++){
1805                         
1806                         if (m->control_pressed) { break; }
1807                         
1808                         if(unique[i] == 1){
1809                                 for(int j=i+1;j<numOTUs;j++){
1810                                         if(unique[j] == 1){
1811                                                 
1812                                                 if(centroids[j] == centroids[i]){
1813                                                         unique[j] = 0;
1814                                                         centroids[j] = -1;
1815                                                         
1816                                                         weight[i] += weight[j];
1817                                                         weight[j] = 0.0;
1818                                                 }
1819                                         }
1820                                 }
1821                         }
1822                 }
1823         }
1824         catch(exception& e) {
1825                 m->errorOut(e, "ShhherCommand", "checkCentroids");
1826                 exit(1);        
1827         }               
1828 }
1829
1830 /**************************************************************************************************/
1831
1832 void ShhherCommand::calcNewDistances(){                          
1833         try{
1834                 
1835 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1836
1837                 if(processors == 1)     {
1838                         calcNewDistancesParent(0, numSeqs);             
1839                 }
1840                 else{ //you have multiple processors
1841                         if (numSeqs < processors){      processors = 1; }
1842                         
1843                         vector<vector<int> > child_otuIndex(processors);
1844                         vector<vector<int> > child_seqIndex(processors);
1845                         vector<vector<double> > child_singleTau(processors);                    
1846                         vector<int> totals(processors);
1847                         
1848                         int process = 1;
1849                         vector<int> processIDs;
1850
1851                         //loop through and create all the processes you want
1852                         while (process != processors) {
1853                                 int pid = vfork();
1854                                 
1855                                 if (pid > 0) {
1856                                         processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1857                                         process++;
1858                                 }else if (pid == 0){
1859                                         calcNewDistancesChild(nSeqsBreaks[process], nSeqsBreaks[process+1], child_otuIndex[process], child_seqIndex[process], child_singleTau[process]);
1860                                         totals[process] = child_otuIndex[process].size();
1861
1862                                         exit(0);
1863                                 }else { 
1864                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
1865                                         perror(" : ");
1866                                         for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
1867                                         exit(0);
1868                                 }
1869                         }
1870                                 
1871                         //parent does its part
1872                         calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
1873                         int total = seqIndex.size();
1874
1875                         //force parent to wait until all the processes are done
1876                         for (int i=0;i<processIDs.size();i++) { 
1877                                 int temp = processIDs[i];
1878                                 wait(&temp);
1879                         }
1880
1881                         for(int i=1;i<processors;i++){
1882                                 int oldTotal = total;
1883                                 total += totals[i];
1884
1885                                 singleTau.resize(total, 0);
1886                                 seqIndex.resize(total, 0);
1887                                 seqNumber.resize(total, 0);
1888                                 
1889                                 int childIndex = 0;
1890                                 
1891                                 for(int j=oldTotal;j<total;j++){
1892                                         int otuI = child_otuIndex[i][childIndex];
1893                                         int seqI = child_seqIndex[i][childIndex];
1894
1895                                         singleTau[j] = child_singleTau[i][childIndex];
1896                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
1897                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
1898                                         nSeqsPerOTU[otuI]++;
1899
1900                                         childIndex++;
1901                                 }
1902                         }
1903                 }
1904 #else
1905                 calcNewDistancesParent(0, numSeqs);             
1906 #endif          
1907         }
1908         catch(exception& e) {
1909                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
1910                 exit(1);        
1911         }               
1912 }
1913
1914 /**************************************************************************************************/
1915 #ifdef USE_MPI
1916 void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
1917         
1918         try{
1919                 vector<double> newTau(numOTUs,0);
1920                 vector<double> norms(numSeqs, 0);
1921                 otuIndex.clear();
1922                 seqIndex.clear();
1923                 singleTau.clear();
1924                 
1925                 for(int i=startSeq;i<stopSeq;i++){
1926                         
1927                         if (m->control_pressed) { break; }
1928                         
1929                         double offset = 1e8;
1930                         int indexOffset = i * numOTUs;
1931                         
1932                         for(int j=0;j<numOTUs;j++){
1933                                 
1934                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1935                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1936                                 }
1937                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1938                                         offset = dist[indexOffset + j];
1939                                 }
1940                         }
1941                         
1942                         for(int j=0;j<numOTUs;j++){
1943                                 if(weight[j] > MIN_WEIGHT){
1944                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1945                                         norms[i] += newTau[j];
1946                                 }
1947                                 else{
1948                                         newTau[j] = 0.0;
1949                                 }
1950                         }
1951                         
1952                         for(int j=0;j<numOTUs;j++){
1953
1954                                 newTau[j] /= norms[i];
1955                                 
1956                                 if(newTau[j] > MIN_TAU){
1957                                         otuIndex.push_back(j);
1958                                         seqIndex.push_back(i);
1959                                         singleTau.push_back(newTau[j]);
1960                                 }
1961                         }
1962                         
1963                 }
1964         }
1965         catch(exception& e) {
1966                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
1967                 exit(1);        
1968         }               
1969 }
1970 #endif
1971 /**************************************************************************************************/
1972
1973 void ShhherCommand::calcNewDistancesChild(int startSeq, int stopSeq, vector<int>& child_otuIndex, vector<int>& child_seqIndex, vector<double>& child_singleTau){
1974         
1975         try{
1976                 vector<double> newTau(numOTUs,0);
1977                 vector<double> norms(numSeqs, 0);
1978                 child_otuIndex.resize(0);
1979                 child_seqIndex.resize(0);
1980                 child_singleTau.resize(0);
1981                 
1982                 for(int i=startSeq;i<stopSeq;i++){
1983                         
1984                         if (m->control_pressed) { break; }
1985                         
1986                         double offset = 1e8;
1987                         int indexOffset = i * numOTUs;
1988                         
1989                         
1990                         for(int j=0;j<numOTUs;j++){
1991                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1992                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1993                                 }
1994                                 
1995                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1996                                         offset = dist[indexOffset + j];
1997                                 }
1998                         }
1999                         
2000                         for(int j=0;j<numOTUs;j++){
2001                                 if(weight[j] > MIN_WEIGHT){
2002                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
2003                                         norms[i] += newTau[j];
2004                                 }
2005                                 else{
2006                                         newTau[j] = 0.0;
2007                                 }
2008                         }
2009                         
2010                         for(int j=0;j<numOTUs;j++){
2011                                 newTau[j] /= norms[i];
2012                                 
2013                                 if(newTau[j] > MIN_TAU){
2014                                         child_otuIndex.push_back(j);
2015                                         child_seqIndex.push_back(i);
2016                                         child_singleTau.push_back(newTau[j]);
2017                                 }
2018                         }
2019                 }
2020         }
2021         catch(exception& e) {
2022                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChild");
2023                 exit(1);        
2024         }               
2025 }
2026
2027 /**************************************************************************************************/
2028
2029 void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
2030         
2031         try{
2032                 
2033                 int total = 0;
2034                 vector<double> newTau(numOTUs,0);
2035                 vector<double> norms(numSeqs, 0);
2036                 nSeqsPerOTU.assign(numOTUs, 0);
2037
2038                 for(int i=startSeq;i<stopSeq;i++){
2039                         
2040                         if (m->control_pressed) { break; }
2041                         
2042                         int indexOffset = i * numOTUs;
2043
2044                         double offset = 1e8;
2045                         
2046                         for(int j=0;j<numOTUs;j++){
2047
2048                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
2049                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
2050                                 }
2051         
2052                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
2053                                         offset = dist[indexOffset + j];
2054                                 }
2055                         }
2056
2057                         for(int j=0;j<numOTUs;j++){
2058                                 if(weight[j] > MIN_WEIGHT){
2059                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
2060                                         norms[i] += newTau[j];
2061                                 }
2062                                 else{
2063                                         newTau[j] = 0.0;
2064                                 }
2065                         }
2066
2067                         for(int j=0;j<numOTUs;j++){
2068                                 newTau[j] /= norms[i];
2069                         }
2070         
2071                         for(int j=0;j<numOTUs;j++){
2072                                 if(newTau[j] > MIN_TAU){
2073                                         
2074                                         int oldTotal = total;
2075                                         
2076                                         total++;
2077                                         
2078                                         singleTau.resize(total, 0);
2079                                         seqNumber.resize(total, 0);
2080                                         seqIndex.resize(total, 0);
2081                                         
2082                                         singleTau[oldTotal] = newTau[j];
2083                                         
2084                                         aaP[j][nSeqsPerOTU[j]] = oldTotal;
2085                                         aaI[j][nSeqsPerOTU[j]] = i;
2086                                         nSeqsPerOTU[j]++;
2087                                 }
2088                         }
2089
2090                 }
2091
2092         }
2093         catch(exception& e) {
2094                 m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
2095                 exit(1);        
2096         }               
2097 }
2098
2099 /**************************************************************************************************/
2100
2101 void ShhherCommand::setOTUs(){
2102         
2103         try {
2104                 vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
2105                 
2106                 for(int i=0;i<numOTUs;i++){
2107                         
2108                         if (m->control_pressed) { break; }
2109                         
2110                         for(int j=0;j<nSeqsPerOTU[i];j++){
2111                                 int index = cumNumSeqs[i] + j;
2112                                 double tauValue = singleTau[seqNumber[index]];
2113                                 int sIndex = seqIndex[index];
2114                                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
2115                         }
2116                 }
2117                 
2118                 for(int i=0;i<numSeqs;i++){
2119                         double maxTau = -1.0000;
2120                         int maxOTU = -1;
2121                         for(int j=0;j<numOTUs;j++){
2122                                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
2123                                         maxTau = bigTauMatrix[i * numOTUs + j];
2124                                         maxOTU = j;
2125                                 }
2126                         }
2127                         
2128                         otuData[i] = maxOTU;
2129                 }
2130                 
2131                 nSeqsPerOTU.assign(numOTUs, 0);         
2132                 
2133                 for(int i=0;i<numSeqs;i++){
2134                         int index = otuData[i];
2135                         
2136                         singleTau[i] = 1.0000;
2137                         dist[i] = 0.0000;
2138                         
2139                         aaP[index][nSeqsPerOTU[index]] = i;
2140                         aaI[index][nSeqsPerOTU[index]] = i;
2141                         
2142                         nSeqsPerOTU[index]++;
2143                 }
2144                 fill(); 
2145         }
2146         catch(exception& e) {
2147                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
2148                 exit(1);        
2149         }               
2150 }
2151
2152 /**************************************************************************************************/
2153
2154 void ShhherCommand::writeQualities(vector<int> otuCounts){
2155         
2156         try {
2157                 string thisOutputDir = outputDir;
2158                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
2159                 string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
2160
2161                 ofstream qualityFile;
2162                 m->openOutputFile(qualityFileName, qualityFile);
2163
2164                 qualityFile.setf(ios::fixed, ios::floatfield);
2165                 qualityFile.setf(ios::showpoint);
2166                 qualityFile << setprecision(6);
2167                 
2168                 vector<vector<int> > qualities(numOTUs);
2169                 vector<double> pr(HOMOPS, 0);
2170                 
2171                 
2172                 for(int i=0;i<numOTUs;i++){
2173                         
2174                         if (m->control_pressed) { break; }
2175                         
2176                         int index = 0;
2177                         int base = 0;
2178                         
2179                         if(nSeqsPerOTU[i] > 0){
2180                                 qualities[i].assign(1024, -1);
2181                                 
2182                                 while(index < numFlowCells){
2183                                         double maxPrValue = 1e8;
2184                                         short maxPrIndex = -1;
2185                                         double count = 0.0000;
2186                                         
2187                                         pr.assign(HOMOPS, 0);
2188                                         
2189                                         for(int j=0;j<nSeqsPerOTU[i];j++){
2190                                                 int lIndex = cumNumSeqs[i] + j;
2191                                                 double tauValue = singleTau[seqNumber[lIndex]];
2192                                                 int sequenceIndex = aaI[i][j];
2193                                                 short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
2194                                                 
2195                                                 count += tauValue;
2196                                                 
2197                                                 for(int s=0;s<HOMOPS;s++){
2198                                                         pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
2199                                                 }
2200                                         }
2201                                         
2202                                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
2203                                         maxPrValue = pr[maxPrIndex];
2204                                         
2205                                         if(count > MIN_COUNT){
2206                                                 double U = 0.0000;
2207                                                 double norm = 0.0000;
2208                                                 
2209                                                 for(int s=0;s<HOMOPS;s++){
2210                                                         norm += exp(-(pr[s] - maxPrValue));
2211                                                 }
2212                                                 
2213                                                 for(int s=1;s<=maxPrIndex;s++){
2214                                                         int value = 0;
2215                                                         double temp = 0.0000;
2216                                                         
2217                                                         U += exp(-(pr[s-1]-maxPrValue))/norm;
2218                                                         
2219                                                         if(U>0.00){
2220                                                                 temp = log10(U);
2221                                                         }
2222                                                         else{
2223                                                                 temp = -10.1;
2224                                                         }
2225                                                         temp = floor(-10 * temp);
2226                                                         value = (int)floor(temp);
2227                                                         if(value > 100){        value = 100;    }
2228                                                         
2229                                                         qualities[i][base] = (int)value;
2230                                                         base++;
2231                                                 }
2232                                         }
2233                                         
2234                                         index++;
2235                                 }
2236                         }
2237                         
2238                         
2239                         if(otuCounts[i] > 0){
2240                                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
2241                                 
2242                                 int j=4;        //need to get past the first four bases
2243                                 while(qualities[i][j] != -1){
2244                                         qualityFile << qualities[i][j] << ' ';
2245                                         j++;
2246                                 }
2247                                 qualityFile << endl;
2248                         }
2249                 }
2250                 qualityFile.close();
2251                 outputNames.push_back(qualityFileName);
2252
2253         }
2254         catch(exception& e) {
2255                 m->errorOut(e, "ShhherCommand", "writeQualities");
2256                 exit(1);        
2257         }               
2258 }
2259
2260 /**************************************************************************************************/
2261
2262 void ShhherCommand::writeSequences(vector<int> otuCounts){
2263         try {
2264                 string thisOutputDir = outputDir;
2265                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
2266                 string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
2267                 ofstream fastaFile;
2268                 m->openOutputFile(fastaFileName, fastaFile);
2269                 
2270                 vector<string> names(numOTUs, "");
2271                 
2272                 for(int i=0;i<numOTUs;i++){
2273                         
2274                         if (m->control_pressed) { break; }
2275                         
2276                         int index = centroids[i];
2277                         
2278                         if(otuCounts[i] > 0){
2279                                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
2280                                 
2281                                 string newSeq = "";
2282                                 
2283                                 for(int j=0;j<numFlowCells;j++){
2284                                         
2285                                         char base = flowOrder[j % 4];
2286                                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
2287                                                 newSeq += base;
2288                                         }
2289                                 }
2290                                 
2291                                 fastaFile << newSeq.substr(4) << endl;
2292                         }
2293                 }
2294                 fastaFile.close();
2295
2296                 outputNames.push_back(fastaFileName);
2297
2298                 if(compositeFASTAFileName != ""){
2299                         m->appendFiles(fastaFileName, compositeFASTAFileName);
2300                 }
2301         }
2302         catch(exception& e) {
2303                 m->errorOut(e, "ShhherCommand", "writeSequences");
2304                 exit(1);        
2305         }               
2306 }
2307
2308 /**************************************************************************************************/
2309
2310 void ShhherCommand::writeNames(vector<int> otuCounts){
2311         try {
2312                 string thisOutputDir = outputDir;
2313                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
2314                 string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
2315                 ofstream nameFile;
2316                 m->openOutputFile(nameFileName, nameFile);
2317                 
2318                 for(int i=0;i<numOTUs;i++){
2319                         
2320                         if (m->control_pressed) { break; }
2321                         
2322                         if(otuCounts[i] > 0){
2323                                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
2324                                 
2325                                 for(int j=1;j<nSeqsPerOTU[i];j++){
2326                                         nameFile << ',' << seqNameVector[aaI[i][j]];
2327                                 }
2328                                 
2329                                 nameFile << endl;
2330                         }
2331                 }
2332                 nameFile.close();
2333                 outputNames.push_back(nameFileName);
2334                 
2335                 
2336                 if(compositeNamesFileName != ""){
2337                         m->appendFiles(nameFileName, compositeNamesFileName);
2338                 }               
2339         }
2340         catch(exception& e) {
2341                 m->errorOut(e, "ShhherCommand", "writeNames");
2342                 exit(1);        
2343         }               
2344 }
2345
2346 /**************************************************************************************************/
2347
2348 void ShhherCommand::writeGroups(){
2349         try {
2350                 string thisOutputDir = outputDir;
2351                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
2352                 string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
2353                 string groupFileName = fileRoot + ".shhh.groups";
2354                 ofstream groupFile;
2355                 m->openOutputFile(groupFileName, groupFile);
2356                 
2357                 for(int i=0;i<numSeqs;i++){
2358                         if (m->control_pressed) { break; }
2359                         groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
2360                 }
2361                 groupFile.close();
2362                 outputNames.push_back(groupFileName);
2363
2364         }
2365         catch(exception& e) {
2366                 m->errorOut(e, "ShhherCommand", "writeGroups");
2367                 exit(1);        
2368         }               
2369 }
2370
2371 /**************************************************************************************************/
2372
2373 void ShhherCommand::writeClusters(vector<int> otuCounts){
2374         try {
2375                 string thisOutputDir = outputDir;
2376                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
2377                 string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
2378                 ofstream otuCountsFile;
2379                 m->openOutputFile(otuCountsFileName, otuCountsFile);
2380                 
2381                 string bases = flowOrder;
2382                 
2383                 for(int i=0;i<numOTUs;i++){
2384                         
2385                         if (m->control_pressed) {
2386                                 break;
2387                         }
2388                         //output the translated version of the centroid sequence for the otu
2389                         if(otuCounts[i] > 0){
2390                                 int index = centroids[i];
2391                                 
2392                                 otuCountsFile << "ideal\t";
2393                                 for(int j=8;j<numFlowCells;j++){
2394                                         char base = bases[j % 4];
2395                                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
2396                                                 otuCountsFile << base;
2397                                         }
2398                                 }
2399                                 otuCountsFile << endl;
2400                                 
2401                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2402                                         int sequence = aaI[i][j];
2403                                         otuCountsFile << seqNameVector[sequence] << '\t';
2404                                         
2405                                         string newSeq = "";
2406                                         
2407                                         for(int k=0;k<lengths[sequence];k++){
2408                                                 char base = bases[k % 4];
2409                                                 int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
2410                                                         
2411                                                 for(int s=0;s<freq;s++){
2412                                                         newSeq += base;
2413                                                         //otuCountsFile << base;
2414                                                 }
2415                                         }
2416                                         otuCountsFile << newSeq.substr(4) << endl;
2417                                 }
2418                                 otuCountsFile << endl;
2419                         }
2420                 }
2421                 otuCountsFile.close();
2422                 outputNames.push_back(otuCountsFileName);
2423
2424         }
2425         catch(exception& e) {
2426                 m->errorOut(e, "ShhherCommand", "writeClusters");
2427                 exit(1);        
2428         }               
2429 }
2430
2431 //**********************************************************************************************************************