]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.cpp
removed various build warnings
[mothur.git] / shhhercommand.cpp
1 /*
2  *  shhher.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhercommand.h"
11
12 #include "readcolumn.h"
13 #include "readmatrix.hpp"
14 #include "rabundvector.hpp"
15 #include "sabundvector.hpp"
16 #include "listvector.hpp"
17 #include "cluster.hpp"
18 #include "sparsematrix.hpp"
19 #include <cfloat>
20
21 //**********************************************************************************************************************
22
23 #define NUMBINS 1000
24 #define HOMOPS 10
25 #define MIN_COUNT 0.1
26 #define MIN_WEIGHT 0.1
27 #define MIN_TAU 0.0001
28 #define MIN_ITER 10
29
30 //**********************************************************************************************************************
31
32 vector<string> ShhherCommand::getValidParameters(){     
33         try {
34                 string Array[] =  {     
35                         "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
36                 };
37                 
38                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
39                 return myArray;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "ShhherCommand", "getValidParameters");
43                 exit(1);
44         }
45 }
46
47 //**********************************************************************************************************************
48
49 ShhherCommand::ShhherCommand(){ 
50         try {
51                 abort = true;
52                 
53                 //initialize outputTypes
54                 vector<string> tempOutNames;
55                 outputTypes["pn.dist"] = tempOutNames;
56
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
60                 exit(1);
61         }
62 }
63
64 //**********************************************************************************************************************
65
66 vector<string> ShhherCommand::getRequiredParameters(){  
67         try {
68                 string Array[] =  {"flow"};
69                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
70                 return myArray;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "ShhherCommand", "getRequiredParameters");
74                 exit(1);
75         }
76 }
77
78 //**********************************************************************************************************************
79
80 vector<string> ShhherCommand::getRequiredFiles(){       
81         try {
82                 vector<string> myArray;
83                 return myArray;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "ShhherCommand", "getRequiredFiles");
87                 exit(1);
88         }
89 }
90
91 //**********************************************************************************************************************
92
93 ShhherCommand::ShhherCommand(string option) {
94         try {
95
96 #ifdef USE_MPI
97                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
98                 MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
99
100                 if(pid == 0){
101 #endif
102                 
103                 
104                 abort = false;
105                 
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; }
109                 
110                 else {
111                         
112                         //valid paramters for this command
113                         string AlignArray[] =  {
114                                 "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors"       
115                         };
116                         
117                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         map<string,string>::iterator it;
124                         
125                         //check to make sure all parameters are valid for command
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["pn.dist"] = tempOutNames;
133                         //                      outputTypes["fasta"] = tempOutNames;
134                         
135                         //if the user changes the input directory command factory will send this info to us in the output parameter 
136                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
137                         if (inputDir == "not found"){   inputDir = "";          }
138                         else {
139                                 string path;
140                                 it = parameters.find("flow");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
146                                 }
147                                 
148                                 it = parameters.find("lookup");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
154                                 }
155
156                                 it = parameters.find("file");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
162                                 }
163                         }
164                         
165                         
166                         //check for required parameters
167                         flowFileName = validParameter.validFile(parameters, "flow", true);
168                         flowFilesFileName = validParameter.validFile(parameters, "file", true);
169                         if (flowFileName == "not found" && flowFilesFileName == "not found") {
170                                 m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
171                                 m->mothurOutEndLine();
172                                 abort = true; 
173                         }
174                         else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }       
175                         
176                         //if the user changes the output directory command factory will send this info to us in the output parameter 
177                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
178                                 outputDir = ""; 
179                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
180                         }
181                         
182                         
183                         //check for optional parameter and set defaults
184                         // ...at some point should added some additional type checking...
185                         string temp;
186                         temp = validParameter.validFile(parameters, "lookup", true);
187                         if (temp == "not found")        {       lookupFileName = "LookUp_Titanium.pat"; }
188                         else if(temp == "not open")     {       abort = true;                   } 
189                         else                                            {       lookupFileName = temp;  }
190                         
191                         temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){      temp = "1";                     }
192                         convert(temp, processors); 
193
194                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
195                         convert(temp, cutoff); 
196                         
197                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
198                         convert(temp, minDelta); 
199
200                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
201                         convert(temp, maxIters); 
202
203                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
204                         convert(temp, sigma); 
205                         
206                         globaldata = GlobalData::getInstance();
207                 }
208                         
209 #ifdef USE_MPI
210                 }                               
211 #endif
212                                 
213         }
214         catch(exception& e) {
215                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
216                 exit(1);
217         }
218 }
219
220 //**********************************************************************************************************************
221
222 ShhherCommand::~ShhherCommand(){}
223
224 //**********************************************************************************************************************
225
226 void ShhherCommand::help(){
227         try {
228                 m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n");
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "ShhherCommand", "help");
232                 exit(1);
233         }
234 }
235
236 //**********************************************************************************************************************
237 #ifdef USE_MPI
238 int ShhherCommand::execute(){
239         try {
240                 int tag = 1976;
241                 MPI_Status status; 
242
243                 double begClock = clock();
244                 unsigned long int begTime = time(NULL);
245
246                 cout.setf(ios::fixed, ios::floatfield);
247                 cout.setf(ios::showpoint);
248                 cout << setprecision(2);
249                 
250                 
251                 if(pid == 0){
252
253                         for(int i=1;i<ncpus;i++){
254                                 MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
255                         }
256                         if(abort == 1){ return 0;       }
257
258                         processors = ncpus;
259                         
260                         cout << "\nGetting preliminary data..." << endl;
261                         getSingleLookUp();
262                         getJointLookUp();
263                         
264                         vector<string> flowFileVector;
265                         if(flowFilesFileName != "not found"){
266                                 string fName;
267
268                                 ifstream flowFilesFile;
269                                 m->openInputFile(flowFilesFileName, flowFilesFile);
270                                 while(flowFilesFile){
271                                         flowFilesFile >> fName;
272                                         flowFileVector.push_back(fName);
273                                         m->gobble(flowFilesFile);
274                                 }
275                         }
276                         else{
277                                 flowFileVector.push_back(flowFileName);
278                         }
279                         int numFiles = flowFileVector.size();
280
281                         for(int i=1;i<ncpus;i++){
282                                 MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
283                         }
284                         
285                         for(int i=0;i<numFiles;i++){
286                                 flowFileName = flowFileVector[i];
287                         
288                                 cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
289                                 cout << "Reading flowgrams..." << endl;
290                                 getFlowData();
291                                 cout << "Identifying unique flowgrams..." << endl;
292                                 getUniques();
293
294                                 cout << "Calculating distances between flowgrams..." << endl;
295                                 char fileName[1024];
296                                 strcpy(fileName, flowFileName.c_str());
297
298                                 for(int i=1;i<ncpus;i++){
299                                         MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
300
301                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
302                                         MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
303                                         MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
304                                         MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
305                                         MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
306                                         MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
307                                         MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
308                                         MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
309                                         MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
310                                         MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
311                                 }                               
312                                                         
313                                 string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
314                                 
315                                 int done;
316                                 for(int i=1;i<ncpus;i++){
317                                         MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
318                                         
319                                         m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
320                                         remove((distFileName + ".temp." + toString(i)).c_str());
321                                 }
322
323                                 string namesFileName = createNamesFile();
324                                 
325                                 cout << "\nClustering flowgrams..." << endl;
326                                 string listFileName = cluster(distFileName, namesFileName);
327         //                      string listFileName = "PriestPot_C7.pn.list";
328         //                      string listFileName = "test.mock_rep3.v69.pn.list";
329                         
330                                 getOTUData(listFileName);
331                                 initPyroCluster();
332
333                                 for(int i=1;i<ncpus;i++){
334                                         MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
335                                         MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
336                                         MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
337                                         MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
338                                 }
339                                 
340                                 
341                                 double maxDelta = 0;
342                                 int iter = 0;
343                                 
344                                 int numOTUsOnCPU = numOTUs / ncpus;
345                                 int numSeqsOnCPU = numSeqs / ncpus;
346                                 
347                                 cout << "\nDenoising flowgrams..." << endl;
348                                 cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
349                                 
350                                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
351
352                                         double cycClock = clock();
353                                         unsigned long int cycTime = time(NULL);
354                                         fill();
355
356                                         int total = singleTau.size();
357                                         for(int i=1;i<ncpus;i++){
358                                                 MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
359                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
360                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
361                                                 
362                                                 MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
363                                                 MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
364                                                 MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
365                                                 MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
366                                                 MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
367                                         }
368                                 
369                                         calcCentroidsDriver(0, numOTUsOnCPU);
370                                         
371                                         for(int i=1;i<ncpus;i++){
372                                                 int otuStart = i * numOTUs / ncpus;
373                                                 int otuStop = (i + 1) * numOTUs / ncpus;
374                                                 
375                                                 vector<int> tempCentroids(numOTUs, 0);
376                                                 vector<short> tempChange(numOTUs, 0);
377                                                 
378                                                 MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
379                                                 MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
380                                                 
381                                                 for(int j=otuStart;j<otuStop;j++){
382                                                         centroids[j] = tempCentroids[j];
383                                                         change[j] = tempChange[j];
384                                                 }
385                                         }
386                                                                         
387                                         maxDelta = getNewWeights();
388                                         double nLL = getLikelihood();
389                                         checkCentroids();
390                                         
391                                         for(int i=1;i<ncpus;i++){
392                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
393                                                 MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
394                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
395                                         }
396                                         
397                                         calcNewDistancesParent(0, numSeqsOnCPU);
398
399                                         total = singleTau.size();
400
401                                         for(int i=1;i<ncpus;i++){
402                                                 int childTotal;
403                                                 int seqStart = i * numSeqs / ncpus;
404                                                 int seqStop = (i + 1) * numSeqs / ncpus;
405                                                 
406                                                 MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
407
408                                                 vector<int> childSeqIndex(childTotal, 0);
409                                                 vector<double> childSingleTau(childTotal, 0);
410                                                 vector<double> childDist(numSeqs * numOTUs, 0);
411                                                 vector<int> otuIndex(childTotal, 0);
412                                                 
413                                                 MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
414                                                 MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
415                                                 MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
416                                                 MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
417
418                                                 int oldTotal = total;
419                                                 total += childTotal;
420                                                 singleTau.resize(total, 0);
421                                                 seqIndex.resize(total, 0);
422                                                 seqNumber.resize(total, 0);
423                                                 
424                                                 int childIndex = 0;
425                                                 
426                                                 for(int j=oldTotal;j<total;j++){
427                                                         int otuI = otuIndex[childIndex];
428                                                         int seqI = childSeqIndex[childIndex];
429                                                         
430                                                         singleTau[j] = childSingleTau[childIndex];
431                                                         
432                                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
433                                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
434                                                         nSeqsPerOTU[otuI]++;
435                                                         childIndex++;
436                                                 }
437                                                 
438                                                 int index = seqStart * numOTUs;
439                                                 for(int j=seqStart;j<seqStop;j++){
440                                                         for(int k=0;k<numOTUs;k++){
441                                                                 dist[index] = childDist[index];
442                                                                 index++;
443                                                         }
444                                                 }                                       
445                                         }
446                                         
447                                         iter++;
448                                         
449                                         cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
450
451                                         if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
452                                                 int live = 1;
453                                                 for(int i=1;i<ncpus;i++){
454                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
455                                                 }
456                                         }
457                                         else{
458                                                 int live = 0;
459                                                 for(int i=1;i<ncpus;i++){
460                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
461                                                 }
462                                         }
463                                         
464                                 }       
465                                 
466                                 cout << "\nFinalizing..." << endl;
467                                 fill();
468                                 setOTUs();
469                                 vector<int> otuCounts(numOTUs, 0);
470                                 for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
471                                 calcCentroidsDriver(0, numOTUs);
472                                 writeQualities(otuCounts);
473                                 writeSequences(otuCounts);
474                                 writeNames(otuCounts);
475                                 writeClusters(otuCounts);
476                                 writeGroups();
477                                 
478                                 remove(distFileName.c_str());
479                                 remove(namesFileName.c_str());
480                                 remove(listFileName.c_str());
481                                 
482                                 cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
483                         }
484
485                 }
486                 else{
487                         int abort = 1;
488                         bool live = 1;
489
490                         MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
491                         if(abort){      return 0;       }
492
493                         int numFiles;
494                         MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
495
496                         for(int i=0;i<numFiles;i++){
497                                 //Now into the pyrodist part
498                                 char fileName[1024];
499                                 MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
500                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
501                                 MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
502                                 MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
503                                 
504                                 flowDataIntI.resize(numSeqs * numFlowCells);
505                                 flowDataPrI.resize(numSeqs * numFlowCells);
506                                 mapUniqueToSeq.resize(numSeqs);
507                                 mapSeqToUnique.resize(numSeqs);
508                                 lengths.resize(numSeqs);
509                                 jointLookUp.resize(NUMBINS * NUMBINS);
510
511                                 MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
512                                 MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
513                                 MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
514                                 MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
515                                 MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
516                                 MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
517                                 MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
518
519                                 flowFileName = string(fileName);
520                                 int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
521                                 int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
522                                 
523                                 string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
524
525                                 int done = 1;
526                                 MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
527
528                                 //Now into the pyronoise part
529                                 MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
530                                 
531                                 singleLookUp.resize(HOMOPS * NUMBINS);
532                                 uniqueFlowgrams.resize(numUniques * numFlowCells);
533                                 weight.resize(numOTUs);
534                                 centroids.resize(numOTUs);
535                                 change.resize(numOTUs);
536                                 dist.assign(numOTUs * numSeqs, 0);
537                                 nSeqsPerOTU.resize(numOTUs);
538                                 cumNumSeqs.resize(numOTUs);
539
540                                 MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
541                                 MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
542                                 MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
543                                 
544                                 int startOTU = pid * numOTUs / ncpus;
545                                 int endOTU = (pid + 1) * numOTUs / ncpus;
546
547                                 int startSeq = pid * numSeqs / ncpus;
548                                 int endSeq = (pid + 1) * numSeqs /ncpus;
549                                 
550                                 int total;
551
552                                 while(live){
553
554                                         MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
555                                         singleTau.assign(total, 0.0000);
556                                         seqNumber.assign(total, 0);
557                                         seqIndex.assign(total, 0);
558                                         
559                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
560                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
561                                         MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
562                                         MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
563                                         MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
564                                         MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
565                                         MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
566                                         
567                                         calcCentroidsDriver(startOTU, endOTU);
568
569                                         MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
570                                         MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
571
572                                         
573                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
574                                         MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
575                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
576
577                                         vector<int> otuIndex(total, 0);
578                                         calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
579                                         total = otuIndex.size();
580                                         
581                                         MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
582                                         MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
583                                         MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
584                                         MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
585                                         MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
586                                 
587                                         MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
588                                 }
589                         }
590                 }               
591
592                 MPI_Barrier(MPI_COMM_WORLD);
593                 return 0;
594
595         }
596         catch(exception& e) {
597                 m->errorOut(e, "ShhherCommand", "execute");
598                 exit(1);
599         }
600 }
601
602 /**************************************************************************************************/
603
604 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
605         try{            
606                 ostringstream outStream;
607                 outStream.setf(ios::fixed, ios::floatfield);
608                 outStream.setf(ios::dec, ios::basefield);
609                 outStream.setf(ios::showpoint);
610                 outStream.precision(6);
611                 
612                 int begTime = time(NULL);
613                 double begClock = clock();
614                 
615                 for(int i=startSeq;i<stopSeq;i++){
616                         for(int j=0;j<i;j++){
617                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
618                                 
619                                 if(flowDistance < 1e-6){
620                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
621                                 }
622                                 else if(flowDistance <= cutoff){
623                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
624                                 }
625                         }
626                         if(i % 100 == 0){
627                                 cout << i << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
628                         }
629                 }
630                 cout << stopSeq << "\t" << (time(NULL) - begTime) << "\t" << (clock()-begClock)/CLOCKS_PER_SEC << endl;
631                 
632                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
633                 if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
634
635                 ofstream distFile(fDistFileName.c_str());
636                 distFile << outStream.str();            
637                 distFile.close();
638                 
639                 return fDistFileName;
640         }
641         catch(exception& e) {
642                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
643                 exit(1);
644         }
645 }
646
647 #else
648 //**********************************************************************************************************************
649
650 int ShhherCommand::execute(){
651         try {
652                 if (abort == true) { return 0; }
653                 
654                 cout.setf(ios::fixed, ios::floatfield);
655                 cout.setf(ios::showpoint);
656                 
657                 getSingleLookUp();
658                 getJointLookUp();
659                                 
660                 vector<string> flowFileVector;
661                 if(flowFilesFileName != "not found"){
662                         string fName;
663                         
664                         ifstream flowFilesFile;
665                         m->openInputFile(flowFilesFileName, flowFilesFile);
666                         while(flowFilesFile){
667                                 flowFilesFile >> fName;
668                                 flowFileVector.push_back(fName);
669                                 m->gobble(flowFilesFile);
670                         }
671                 }
672                 else{
673                         flowFileVector.push_back(flowFileName);
674                 }
675                 int numFiles = flowFileVector.size();
676                 
677                 
678                 for(int i=0;i<numFiles;i++){
679                         flowFileName = flowFileVector[i];
680
681                         cout << "\n>>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl;
682                         cout << "Reading flowgrams..." << endl;
683                         getFlowData();
684                         cout << "Identifying unique flowgrams..." << endl;
685                         getUniques();
686                         
687                         
688                         cout << "Calculating distances between flowgrams..." << endl;                   
689                         string distFileName = createDistFile(processors);
690                         string namesFileName = createNamesFile();
691                         
692                         cout << "\nClustering flowgrams..." << endl;
693                         string listFileName = cluster(distFileName, namesFileName);
694                         getOTUData(listFileName);
695                         
696                         initPyroCluster();
697                         
698                         double maxDelta = 0;
699                         int iter = 0;
700                         
701                         double begClock = clock();
702                         unsigned long int begTime = time(NULL);
703
704                         cout << "\nDenoising flowgrams..." << endl;
705                         cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl;
706                         
707                         while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
708                                 
709                                 double cycClock = clock();
710                                 unsigned long int cycTime = time(NULL);
711                                 fill();
712                                 
713                                 calcCentroids();
714                                 
715                                 maxDelta = getNewWeights();
716                                 double nLL = getLikelihood();
717                                 checkCentroids();
718                                 
719                                 calcNewDistances();
720
721                                 iter++;
722                                 
723                                 cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl;                   
724                         }       
725                         
726                         cout << "\nFinalizing..." << endl;
727                         fill();
728                         setOTUs();
729                         
730                         vector<int> otuCounts(numOTUs, 0);
731                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
732                         
733                         calcCentroidsDriver(0, numOTUs);
734                         writeQualities(otuCounts);
735                         writeSequences(otuCounts);
736                         writeNames(otuCounts);
737                         writeClusters(otuCounts);
738                         writeGroups();
739                         
740                         remove(distFileName.c_str());
741                         remove(namesFileName.c_str());
742                         remove(listFileName.c_str());
743                         
744                         cout << "Total time to process " << flowFileName << ":\t" << time(NULL) - begTime << '\t' << setprecision(6) << (clock() - begClock)/(double)CLOCKS_PER_SEC << endl;                    
745                 }
746                 return 0;
747         }
748         catch(exception& e) {
749                 m->errorOut(e, "ShhherCommand", "execute");
750                 exit(1);
751         }
752 }
753 #endif
754 /**************************************************************************************************/
755
756 void ShhherCommand::getFlowData(){
757         try{
758                 ifstream flowFile;
759                 m->openInputFile(flowFileName, flowFile);
760                 
761                 string seqName;
762                 
763                 int currentNumFlowCells;
764                 
765                 float intensity;
766                 
767                 flowFile >> numFlowCells;
768                 int index = 0;//pcluster
769                 while(!flowFile.eof()){
770                         flowFile >> seqName >> currentNumFlowCells;
771                         lengths.push_back(currentNumFlowCells);
772
773                         seqNameVector.push_back(seqName);
774                         nameMap[seqName] = index++;//pcluster
775
776                         for(int i=0;i<numFlowCells;i++){
777                                 flowFile >> intensity;
778                                 if(intensity > 9.99)    {       intensity = 9.99;       }
779                                 int intI = int(100 * intensity + 0.0001);
780                                 flowDataIntI.push_back(intI);
781                         }
782                         m->gobble(flowFile);
783                 }
784                 flowFile.close();
785                 
786                 numSeqs = seqNameVector.size();         
787                 
788                 for(int i=0;i<numSeqs;i++){
789                         int iNumFlowCells = i * numFlowCells;
790                         for(int j=lengths[i];j<numFlowCells;j++){
791                                 flowDataIntI[iNumFlowCells + j] = 0;
792                         }
793                 }
794                 
795         }
796         catch(exception& e) {
797                 m->errorOut(e, "ShhherCommand", "getFlowData");
798                 exit(1);
799         }
800 }
801
802 /**************************************************************************************************/
803
804 void ShhherCommand::getSingleLookUp(){
805         try{
806                 //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
807                 singleLookUp.assign(HOMOPS * NUMBINS, 0);
808                 
809                 int index = 0;
810                 ifstream lookUpFile;
811                 m->openInputFile(lookupFileName, lookUpFile);
812                 
813                 for(int i=0;i<HOMOPS;i++){
814                         float logFracFreq;
815                         lookUpFile >> logFracFreq;
816                         
817                         for(int j=0;j<NUMBINS;j++)      {
818                                 lookUpFile >> singleLookUp[index];
819                                 index++;
820                         }
821                 }       
822                 lookUpFile.close();
823         }
824         catch(exception& e) {
825                 m->errorOut(e, "ShhherCommand", "getSingleLookUp");
826                 exit(1);
827         }
828 }
829
830 /**************************************************************************************************/
831
832 void ShhherCommand::getJointLookUp(){
833         try{
834                 
835                 //      the most likely joint probability (-log) that two intenities have the same polymer length
836                 jointLookUp.resize(NUMBINS * NUMBINS, 0);
837                 
838                 for(int i=0;i<NUMBINS;i++){
839                         for(int j=0;j<NUMBINS;j++){             
840                                 
841                                 float minSum = 10000000000;
842                                 
843                                 for(int k=0;k<HOMOPS;k++){
844                                         float sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
845                                         
846                                         if(sum < minSum)        {       minSum = sum;           }
847                                 }       
848                                 jointLookUp[i * NUMBINS + j] = minSum;
849                         }
850                 }
851         }
852         catch(exception& e) {
853                 m->errorOut(e, "ShhherCommand", "getJointLookUp");
854                 exit(1);
855         }
856 }
857
858 /**************************************************************************************************/
859
860 float ShhherCommand::getProbIntensity(int intIntensity){                          
861         try{
862                 float minNegLogProb = 10000000000; 
863                 
864                 for(int i=0;i<HOMOPS;i++){//loop signal strength
865                         float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
866                         if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
867                 }
868                 
869                 return minNegLogProb;
870         }
871         catch(exception& e) {
872                 m->errorOut(e, "ShhherCommand", "getProbIntensity");
873                 exit(1);
874         }
875 }
876
877 /**************************************************************************************************/
878
879 void ShhherCommand::getUniques(){
880         try{
881                 
882                 
883                 numUniques = 0;
884                 uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
885                 uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
886                 uniqueLengths.assign(numSeqs, 0);
887                 mapSeqToUnique.assign(numSeqs, -1);
888                 mapUniqueToSeq.assign(numSeqs, -1);
889                 
890                 vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
891                 
892                 for(int i=0;i<numSeqs;i++){
893                         int index = 0;
894                         
895                         vector<short> current(numFlowCells);
896                         for(int j=0;j<numFlowCells;j++){        current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));        }
897                                                 
898                         for(int j=0;j<numUniques;j++){
899                                 int offset = j * numFlowCells;
900                                 bool toEnd = 1;
901                                 
902                                 for(int k=0;k<numFlowCells;k++){
903                                         if(current[k] != uniqueFlowgrams[offset + k]){
904                                                 toEnd = 0;
905                                                 break;
906                                         }
907                                 }
908                                 
909                                 if(toEnd){
910                                         mapSeqToUnique[i] = j;
911                                         uniqueCount[j]++;
912                                         index = j;
913                                         break;
914                                 }
915                                 index++;
916                         }
917                         
918                         if(index == numUniques){
919                                 uniqueLengths[numUniques] = lengths[i];
920                                 uniqueCount[numUniques] = 1;
921                                 mapSeqToUnique[i] = numUniques;//anMap
922                                 mapUniqueToSeq[numUniques] = i;//anF
923                                 
924                                 for(int k=0;k<numFlowCells;k++){
925                                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
926                                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
927                                 }
928                                 
929                                 numUniques++;
930                         }
931                 }
932                 uniqueFlowDataIntI.resize(numFlowCells * numUniques);
933                 uniqueLengths.resize(numUniques);       
934                 
935                 flowDataPrI.assign(numSeqs * numFlowCells, 0);
936                 for(int i=0;i<flowDataPrI.size();i++)   {       flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);             }
937         }
938         catch(exception& e) {
939                 m->errorOut(e, "ShhherCommand", "getUniques");
940                 exit(1);
941         }
942 }
943
944 /**************************************************************************************************/
945
946 float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
947         try{
948                 int minLength = lengths[mapSeqToUnique[seqA]];
949                 if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
950                 
951                 int ANumFlowCells = seqA * numFlowCells;
952                 int BNumFlowCells = seqB * numFlowCells;
953                 
954                 float dist = 0;
955                 
956                 for(int i=0;i<minLength;i++){
957                         int flowAIntI = flowDataIntI[ANumFlowCells + i];
958                         float flowAPrI = flowDataPrI[ANumFlowCells + i];
959                         
960                         int flowBIntI = flowDataIntI[BNumFlowCells + i];
961                         float flowBPrI = flowDataPrI[BNumFlowCells + i];
962                         dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
963                 }
964                 
965                 dist /= (float) minLength;
966                 return dist;
967         }
968         catch(exception& e) {
969                 m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
970                 exit(1);
971         }
972 }
973
974 /**************************************************************************************************/
975
976 void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int stopSeq){
977         try{            
978
979                 ostringstream outStream;
980                 outStream.setf(ios::fixed, ios::floatfield);
981                 outStream.setf(ios::dec, ios::basefield);
982                 outStream.setf(ios::showpoint);
983                 outStream.precision(6);
984                 
985                 int begTime = time(NULL);
986                 double begClock = clock();
987
988                 for(int i=startSeq;i<stopSeq;i++){
989                         for(int j=0;j<i;j++){
990                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
991
992                                 if(flowDistance < 1e-6){
993                                         outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << 0.000000 << endl;
994                                 }
995                                 else if(flowDistance <= cutoff){
996                                         outStream << seqNameVector[mapUniqueToSeq[i]] << '\t' << seqNameVector[mapUniqueToSeq[j]] << '\t' << flowDistance << endl;
997                                 }
998                         }
999                         if(i % 100 == 0){
1000                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
1001                                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
1002                                 m->mothurOutEndLine();
1003                         }
1004                 }
1005                 m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
1006                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
1007                 m->mothurOutEndLine();
1008                 
1009                 ofstream distFile(distFileName.c_str());
1010                 distFile << outStream.str();            
1011                 distFile.close();
1012         }
1013         catch(exception& e) {
1014                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
1015                 exit(1);
1016         }
1017 }
1018
1019 /**************************************************************************************************/
1020
1021 string ShhherCommand::createDistFile(int processors){
1022         try{
1023                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist";
1024                                 
1025                 unsigned long int begTime = time(NULL);
1026                 double begClock = clock();
1027
1028                 vector<int> start;
1029                 vector<int> end;
1030                 
1031 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1032                 if(processors == 1)     {       flowDistParentFork(fDistFileName, 0, numUniques);               }
1033                 else{ //you have multiple processors
1034                         
1035                         if (numSeqs < processors){      processors = 1; }
1036                         
1037                         vector<int> start(processors, 0);
1038                         vector<int> end(processors, 0);
1039                         
1040                         for (int i = 0; i < processors; i++) {
1041                                 start[i] = int(sqrt(float(i)/float(processors)) * numUniques);
1042                                 end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques);
1043                         }
1044                         
1045                         int process = 1;
1046                         vector<int> processIDs;
1047                         
1048                         //loop through and create all the processes you want
1049                         while (process != processors) {
1050                                 int pid = fork();
1051                                 
1052                                 if (pid > 0) {
1053                                         processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1054                                         process++;
1055                                 }else if (pid == 0){
1056                                         flowDistParentFork(fDistFileName + toString(getpid()) + ".temp", start[process], end[process]);
1057                                         exit(0);
1058                                 }else { 
1059                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
1060                                         perror(" : ");
1061                                         for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
1062                                         exit(0);
1063                                 }
1064                         }
1065                         
1066                         //parent does its part
1067                         flowDistParentFork(fDistFileName, start[0], end[0]);
1068                         
1069                         //force parent to wait until all the processes are done
1070                         for (int i=0;i<processIDs.size();i++) { 
1071                                 int temp = processIDs[i];
1072                                 wait(&temp);
1073                         }
1074                         
1075                         //append and remove temp files
1076                         for (int i=0;i<processIDs.size();i++) { 
1077                                 m->appendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName);
1078                                 remove((fDistFileName + toString(processIDs[i]) + ".temp").c_str());
1079                         }
1080                         
1081                 }
1082                 
1083 #else
1084                 flowDistParentFork(fDistFileName, 0, numUniques);
1085 #endif
1086
1087                 m->mothurOutEndLine();
1088                 
1089                 cout << "Total time: " << (time(NULL) - begTime) << "\t"  << (clock() - begClock)/CLOCKS_PER_SEC << endl;;
1090
1091                 return fDistFileName;
1092         }
1093         catch(exception& e) {
1094                 m->errorOut(e, "ShhherCommand", "createDistFile");
1095                 exit(1);
1096         }
1097         
1098 }
1099
1100 /**************************************************************************************************/
1101
1102 string ShhherCommand::createNamesFile(){
1103         try{
1104                 
1105                 vector<string> duplicateNames(numUniques, "");
1106                 for(int i=0;i<numSeqs;i++){
1107                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
1108                 }
1109                 
1110                 string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.names";
1111                 
1112                 ofstream nameFile;
1113                 m->openOutputFile(nameFileName, nameFile);
1114                 
1115                 for(int i=0;i<numUniques;i++){
1116 //                      nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
1117                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
1118                 }
1119                 
1120                 nameFile.close();
1121                 return  nameFileName;
1122         }
1123         catch(exception& e) {
1124                 m->errorOut(e, "ShhherCommand", "createNamesFile");
1125                 exit(1);
1126         }
1127 }
1128
1129 //**********************************************************************************************************************
1130
1131 string ShhherCommand::cluster(string distFileName, string namesFileName){
1132         try {
1133                 
1134                 SparseMatrix* matrix;
1135                 ListVector* list;
1136                 RAbundVector* rabund;
1137                 
1138                 globaldata->setNameFile(namesFileName);
1139                 globaldata->setColumnFile(distFileName);
1140                 globaldata->setFormat("column");
1141                 
1142                 ReadMatrix* read = new ReadColumnMatrix(distFileName);  
1143                 read->setCutoff(cutoff);
1144                 
1145                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
1146                 clusterNameMap->readMap();
1147                 read->read(clusterNameMap);
1148                 
1149                 list = read->getListVector();
1150                 matrix = read->getMatrix();
1151                 
1152                 delete read; 
1153                 delete clusterNameMap; 
1154                                 
1155                 rabund = new RAbundVector(list->getRAbundVector());
1156                 
1157                 Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
1158                 string tag = cluster->getTag();
1159                 
1160                 double clusterCutoff = cutoff;
1161                 while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
1162                         cluster->update(clusterCutoff);
1163                 }
1164                 
1165                 list->setLabel(toString(cutoff));
1166                 
1167                 string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.list";
1168                 ofstream listFile;
1169                 m->openOutputFile(listFileName, listFile);
1170                 list->print(listFile);
1171                 listFile.close();
1172                 
1173                 delete matrix;  delete cluster; delete rabund; delete list;
1174         
1175                 return listFileName;
1176         }
1177         catch(exception& e) {
1178                 m->errorOut(e, "ShhherCommand", "cluster");
1179                 exit(1);        
1180         }               
1181 }
1182
1183 /**************************************************************************************************/
1184
1185 void ShhherCommand::getOTUData(string listFileName){
1186         try {
1187
1188                 ifstream listFile;
1189                 m->openInputFile(listFileName, listFile);
1190                 string label;
1191                 
1192                 listFile >> label >> numOTUs;
1193
1194                 otuData.assign(numSeqs, 0);
1195                 cumNumSeqs.assign(numOTUs, 0);
1196                 nSeqsPerOTU.assign(numOTUs, 0);
1197                 aaP.resize(numOTUs);
1198                 
1199                 string singleOTU = "";
1200                 
1201                 for(int i=0;i<numOTUs;i++){
1202
1203                         listFile >> singleOTU;
1204                         
1205                         istringstream otuString(singleOTU);
1206
1207                         while(otuString){
1208                                 
1209                                 string seqName = "";
1210                                 
1211                                 for(int j=0;j<singleOTU.length();j++){
1212                                         char letter = otuString.get();
1213                                         
1214                                         if(letter != ','){
1215                                                 seqName += letter;
1216                                         }
1217                                         else{
1218                                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
1219                                                 int index = nmIt->second;
1220                                                 
1221                                                 nameMap.erase(nmIt);
1222                                                 
1223                                                 otuData[index] = i;
1224                                                 nSeqsPerOTU[i]++;
1225                                                 aaP[i].push_back(index);
1226                                                 seqName = "";
1227                                         }
1228                                 }
1229                                 
1230                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
1231
1232                                 int index = nmIt->second;
1233                                 nameMap.erase(nmIt);
1234
1235                                 otuData[index] = i;
1236                                 nSeqsPerOTU[i]++;
1237                                 aaP[i].push_back(index);        
1238                                 
1239                                 otuString.get();
1240                         }
1241                         
1242                         sort(aaP[i].begin(), aaP[i].end());
1243                         for(int j=0;j<nSeqsPerOTU[i];j++){
1244                                 seqNumber.push_back(aaP[i][j]);
1245                         }
1246                         for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
1247                                 aaP[i].push_back(0);
1248                         }
1249                 }
1250                 
1251                 for(int i=1;i<numOTUs;i++){
1252                         cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
1253                 }
1254                 aaI = aaP;
1255                 seqIndex = seqNumber;
1256                 
1257                 listFile.close();       
1258         }
1259         catch(exception& e) {
1260                 m->errorOut(e, "ShhherCommand", "getOTUData");
1261                 exit(1);        
1262         }               
1263 }
1264
1265 /**************************************************************************************************/
1266
1267 void ShhherCommand::initPyroCluster(){                          
1268         try{
1269                 dist.assign(numSeqs * numOTUs, 0);
1270                 change.assign(numOTUs, 1);
1271                 centroids.assign(numOTUs, -1);
1272                 weight.assign(numOTUs, 0);
1273                 singleTau.assign(numSeqs, 1.0);
1274                 
1275                 nSeqsBreaks.assign(processors+1, 0);
1276                 nOTUsBreaks.assign(processors+1, 0);
1277
1278                 nSeqsBreaks[0] = 0;
1279                 for(int i=0;i<processors;i++){
1280                         nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
1281                         nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
1282                 }
1283                 nSeqsBreaks[processors] = numSeqs;
1284                 nOTUsBreaks[processors] = numOTUs;
1285         }
1286         catch(exception& e) {
1287                 m->errorOut(e, "ShhherCommand", "initPyroCluster");
1288                 exit(1);        
1289         }
1290 }
1291
1292 /**************************************************************************************************/
1293
1294 void ShhherCommand::fill(){
1295         try {
1296                 int index = 0;
1297                 for(int i=0;i<numOTUs;i++){
1298                         cumNumSeqs[i] = index;
1299                         for(int j=0;j<nSeqsPerOTU[i];j++){
1300                                 seqNumber[index] = aaP[i][j];
1301                                 seqIndex[index] = aaI[i][j];
1302                                 
1303                                 index++;
1304                         }
1305                 }
1306         }
1307         catch(exception& e) {
1308                 m->errorOut(e, "ShhherCommand", "fill");
1309                 exit(1);        
1310         }               
1311 }
1312
1313 /**************************************************************************************************/
1314
1315 void ShhherCommand::calcCentroids(){                          
1316         try{
1317                 
1318 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1319
1320                 if(processors == 1)     {
1321                         calcCentroidsDriver(0, numOTUs);                
1322                 }
1323                 else{ //you have multiple processors
1324                         if (numOTUs < processors){      processors = 1; }
1325                         
1326                         int process = 1;
1327                         vector<int> processIDs;
1328                         
1329                         //loop through and create all the processes you want
1330                         while (process != processors) {
1331                                 int pid = vfork();
1332                                 
1333                                 if (pid > 0) {
1334                                         processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1335                                         process++;
1336                                 }else if (pid == 0){
1337                                         calcCentroidsDriver(nOTUsBreaks[process], nOTUsBreaks[process+1]);
1338                                         exit(0);
1339                                 }else { 
1340                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
1341                                         perror(" : ");
1342                                         for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
1343                                         exit(0);
1344                                 }
1345                         }
1346                         
1347                         //parent does its part
1348                         calcCentroidsDriver(nOTUsBreaks[0], nOTUsBreaks[1]);
1349
1350                         //force parent to wait until all the processes are done
1351                         for (int i=0;i<processIDs.size();i++) { 
1352                                 int temp = processIDs[i];
1353                                 wait(&temp);
1354                         }
1355                 }
1356                 
1357 #else
1358                 calcCentroidsDriver(0, numOTUs);
1359 #endif          
1360         }
1361         catch(exception& e) {
1362                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1363                 exit(1);        
1364         }               
1365 }
1366
1367 /**************************************************************************************************/
1368
1369 void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
1370         
1371         //this function gets the most likely homopolymer length at a flow position for a group of sequences
1372         //within an otu
1373         
1374         try{
1375                 
1376                 for(int i=start;i<finish;i++){
1377                         
1378                         double count = 0;
1379                         int position = 0;
1380                         int minFlowGram = 100000000;
1381                         double minFlowValue = 1e8;
1382                         change[i] = 0; //FALSE
1383                         
1384                         for(int j=0;j<nSeqsPerOTU[i];j++){
1385                                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1386                         }
1387                         
1388                         if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1389                                 vector<double> adF(nSeqsPerOTU[i]);
1390                                 vector<int> anL(nSeqsPerOTU[i]);
1391                                 
1392                                 for(int j=0;j<nSeqsPerOTU[i];j++){
1393                                         int index = cumNumSeqs[i] + j;
1394                                         int nI = seqIndex[index];
1395                                         int nIU = mapSeqToUnique[nI];
1396                                         
1397                                         int k;
1398                                         for(k=0;k<position;k++){
1399                                                 if(nIU == anL[k]){
1400                                                         break;
1401                                                 }
1402                                         }
1403                                         if(k == position){
1404                                                 anL[position] = nIU;
1405                                                 adF[position] = 0.0000;
1406                                                 position++;
1407                                         }                                               
1408                                 }
1409                                 
1410                                 for(int j=0;j<nSeqsPerOTU[i];j++){
1411                                         int index = cumNumSeqs[i] + j;
1412                                         int nI = seqIndex[index];
1413                                         
1414                                         double tauValue = singleTau[seqNumber[index]];
1415                                         
1416                                         for(int k=0;k<position;k++){
1417                                                 double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
1418                                                 adF[k] += dist * tauValue;
1419                                         }
1420                                 }
1421                                 
1422                                 for(int j=0;j<position;j++){
1423                                         if(adF[j] < minFlowValue){
1424                                                 minFlowGram = j;
1425                                                 minFlowValue = adF[j];
1426                                         }
1427                                 }
1428                                 
1429                                 if(centroids[i] != anL[minFlowGram]){
1430                                         change[i] = 1;
1431                                         centroids[i] = anL[minFlowGram];
1432                                 }
1433                         }
1434                         else if(centroids[i] != -1){
1435                                 change[i] = 1;
1436                                 centroids[i] = -1;                      
1437                         }
1438                 }
1439         }
1440         catch(exception& e) {
1441                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1442                 exit(1);        
1443         }               
1444 }
1445
1446 /**************************************************************************************************/
1447
1448 double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
1449         try{
1450                 
1451                 int flowAValue = cent * numFlowCells;
1452                 int flowBValue = flow * numFlowCells;
1453                 
1454                 double dist = 0;
1455                 
1456                 for(int i=0;i<length;i++){
1457                         dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1458                         flowAValue++;
1459                         flowBValue++;
1460                 }
1461                 
1462                 return dist / (double)length;
1463         }
1464         catch(exception& e) {
1465                 m->errorOut(e, "ShhherCommand", "getDistToCentroid");
1466                 exit(1);        
1467         }               
1468 }
1469
1470 /**************************************************************************************************/
1471
1472 double ShhherCommand::getNewWeights(){
1473         try{
1474                 
1475                 double maxChange = 0;
1476                 
1477                 for(int i=0;i<numOTUs;i++){
1478                         
1479                         double difference = weight[i];
1480                         weight[i] = 0;
1481                         
1482                         for(int j=0;j<nSeqsPerOTU[i];j++){
1483                                 int index = cumNumSeqs[i] + j;
1484                                 double tauValue = singleTau[seqNumber[index]];
1485                                 weight[i] += tauValue;
1486                         }
1487                         
1488                         difference = fabs(weight[i] - difference);
1489                         if(difference > maxChange){     maxChange = difference; }
1490                 }
1491                 return maxChange;
1492         }
1493         catch(exception& e) {
1494                 m->errorOut(e, "ShhherCommand", "getNewWeights");
1495                 exit(1);        
1496         }               
1497 }
1498
1499 /**************************************************************************************************/
1500
1501 double ShhherCommand::getLikelihood(){
1502         
1503         try{
1504                 
1505                 vector<long double> P(numSeqs, 0);
1506                 int effNumOTUs = 0;
1507                 
1508                 for(int i=0;i<numOTUs;i++){
1509                         if(weight[i] > MIN_WEIGHT){
1510                                 effNumOTUs++;
1511                         }
1512                 }
1513                 
1514                 string hold;
1515                 for(int i=0;i<numOTUs;i++){
1516                         for(int j=0;j<nSeqsPerOTU[i];j++){
1517                                 int index = cumNumSeqs[i] + j;
1518                                 int nI = seqIndex[index];
1519                                 double singleDist = dist[seqNumber[index]];
1520                                 
1521                                 P[nI] += weight[i] * exp(-singleDist * sigma);
1522                         }
1523                 }
1524                 double nLL = 0.00;
1525                 for(int i=0;i<numSeqs;i++){
1526                         if(P[i] == 0){  P[i] = DBL_EPSILON;     }
1527
1528                         nLL += -log(P[i]);
1529                 }
1530                 
1531                 nLL = nLL -(double)numSeqs * log(sigma);
1532
1533                 return nLL; 
1534         }
1535         catch(exception& e) {
1536                 m->errorOut(e, "ShhherCommand", "getNewWeights");
1537                 exit(1);        
1538         }               
1539 }
1540
1541 /**************************************************************************************************/
1542
1543 void ShhherCommand::checkCentroids(){
1544         try{
1545                 vector<int> unique(numOTUs, 1);
1546                 
1547                 for(int i=0;i<numOTUs;i++){
1548                         if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
1549                                 unique[i] = -1;
1550                         }
1551                 }
1552                 
1553                 for(int i=0;i<numOTUs;i++){
1554                         if(unique[i] == 1){
1555                                 for(int j=i+1;j<numOTUs;j++){
1556                                         if(unique[j] == 1){
1557                                                 
1558                                                 if(centroids[j] == centroids[i]){
1559                                                         unique[j] = 0;
1560                                                         centroids[j] = -1;
1561                                                         
1562                                                         weight[i] += weight[j];
1563                                                         weight[j] = 0.0;
1564                                                 }
1565                                         }
1566                                 }
1567                         }
1568                 }
1569         }
1570         catch(exception& e) {
1571                 m->errorOut(e, "ShhherCommand", "checkCentroids");
1572                 exit(1);        
1573         }               
1574 }
1575
1576 /**************************************************************************************************/
1577
1578 void ShhherCommand::calcNewDistances(){                          
1579         try{
1580                 
1581 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1582                 
1583                 if(processors == 1)     {
1584                         calcNewDistancesParent(0, numSeqs);             
1585                 }
1586                 else{ //you have multiple processors
1587                         if (numSeqs < processors){      processors = 1; }
1588                         
1589                         vector<vector<int> > child_otuIndex(processors);
1590                         vector<vector<int> > child_seqIndex(processors);
1591                         vector<vector<double> > child_singleTau(processors);                    
1592                         vector<int> totals(processors);
1593                         
1594                         int process = 1;
1595                         vector<int> processIDs;
1596
1597                         //loop through and create all the processes you want
1598                         while (process != processors) {
1599                                 int pid = vfork();
1600                                 
1601                                 if (pid > 0) {
1602                                         processIDs.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1603                                         process++;
1604                                 }else if (pid == 0){
1605                                         calcNewDistancesChild(nSeqsBreaks[process], nSeqsBreaks[process+1], child_otuIndex[process], child_seqIndex[process], child_singleTau[process]);
1606                                         totals[process] = child_otuIndex[process].size();
1607
1608                                         exit(0);
1609                                 }else { 
1610                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); 
1611                                         perror(" : ");
1612                                         for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill (temp, SIGINT); }
1613                                         exit(0);
1614                                 }
1615                         }
1616                         
1617                         //parent does its part
1618                         calcNewDistancesParent(nSeqsBreaks[0], nSeqsBreaks[1]);
1619                         int total = seqIndex.size();
1620
1621                         //force parent to wait until all the processes are done
1622                         for (int i=0;i<processIDs.size();i++) { 
1623                                 int temp = processIDs[i];
1624                                 wait(&temp);
1625                         }
1626
1627                         for(int i=1;i<processors;i++){
1628                                 int oldTotal = total;
1629                                 total += totals[i];
1630
1631                                 singleTau.resize(total, 0);
1632                                 seqIndex.resize(total, 0);
1633                                 seqNumber.resize(total, 0);
1634                                 
1635                                 int childIndex = 0;
1636                                 
1637                                 for(int j=oldTotal;j<total;j++){
1638                                         int otuI = child_otuIndex[i][childIndex];
1639                                         int seqI = child_seqIndex[i][childIndex];
1640
1641                                         singleTau[j] = child_singleTau[i][childIndex];
1642                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
1643                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
1644                                         nSeqsPerOTU[otuI]++;
1645
1646                                         childIndex++;
1647                                 }
1648                         }
1649                 }
1650 #else
1651                 calcNewDistancesParent(0, numSeqs);             
1652 #endif          
1653         }
1654         catch(exception& e) {
1655                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
1656                 exit(1);        
1657         }               
1658 }
1659
1660 /**************************************************************************************************/
1661 #ifdef USE_MPI
1662 void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
1663         
1664         try{
1665                 vector<double> newTau(numOTUs,0);
1666                 vector<double> norms(numSeqs, 0);
1667                 otuIndex.resize(0);
1668                 seqIndex.resize(0);
1669                 singleTau.resize(0);
1670                 
1671                 
1672                 
1673                 for(int i=startSeq;i<stopSeq;i++){
1674                         double offset = 1e8;
1675                         int indexOffset = i * numOTUs;
1676                         
1677                         for(int j=0;j<numOTUs;j++){
1678                                 
1679                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1680                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1681                                 }
1682                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1683                                         offset = dist[indexOffset + j];
1684                                 }
1685                         }
1686                         
1687                         for(int j=0;j<numOTUs;j++){
1688                                 if(weight[j] > MIN_WEIGHT){
1689                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1690                                         norms[i] += newTau[j];
1691                                 }
1692                                 else{
1693                                         newTau[j] = 0.0;
1694                                 }
1695                         }
1696                         
1697                         for(int j=0;j<numOTUs;j++){
1698
1699                                 newTau[j] /= norms[i];
1700                                 
1701                                 if(newTau[j] > MIN_TAU){
1702                                         otuIndex.push_back(j);
1703                                         seqIndex.push_back(i);
1704                                         singleTau.push_back(newTau[j]);
1705                                 }
1706                         }
1707                         
1708                 }
1709         }
1710         catch(exception& e) {
1711                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
1712                 exit(1);        
1713         }               
1714 }
1715 #endif
1716 /**************************************************************************************************/
1717
1718 void ShhherCommand::calcNewDistancesChild(int startSeq, int stopSeq, vector<int>& child_otuIndex, vector<int>& child_seqIndex, vector<double>& child_singleTau){
1719         
1720         try{
1721                 vector<double> newTau(numOTUs,0);
1722                 vector<double> norms(numSeqs, 0);
1723                 child_otuIndex.resize(0);
1724                 child_seqIndex.resize(0);
1725                 child_singleTau.resize(0);
1726                 
1727                 for(int i=startSeq;i<stopSeq;i++){
1728                         double offset = 1e8;
1729                         int indexOffset = i * numOTUs;
1730                         
1731                         
1732                         for(int j=0;j<numOTUs;j++){
1733                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1734                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1735                                 }
1736                                 
1737                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1738                                         offset = dist[indexOffset + j];
1739                                 }
1740                         }
1741                         
1742                         for(int j=0;j<numOTUs;j++){
1743                                 if(weight[j] > MIN_WEIGHT){
1744                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1745                                         norms[i] += newTau[j];
1746                                 }
1747                                 else{
1748                                         newTau[j] = 0.0;
1749                                 }
1750                         }
1751                         
1752                         for(int j=0;j<numOTUs;j++){
1753                                 newTau[j] /= norms[i];
1754                                 
1755                                 if(newTau[j] > MIN_TAU){
1756                                         child_otuIndex.push_back(j);
1757                                         child_seqIndex.push_back(i);
1758                                         child_singleTau.push_back(newTau[j]);
1759                                 }
1760                         }
1761                 }
1762         }
1763         catch(exception& e) {
1764                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChild");
1765                 exit(1);        
1766         }               
1767 }
1768
1769 /**************************************************************************************************/
1770
1771 void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
1772         
1773         try{
1774                 
1775                 int total = 0;
1776                 vector<double> newTau(numOTUs,0);
1777                 vector<double> norms(numSeqs, 0);
1778                 nSeqsPerOTU.assign(numOTUs, 0);
1779                 
1780                 for(int i=startSeq;i<stopSeq;i++){
1781                         int indexOffset = i * numOTUs;
1782                         
1783                         double offset = 1e8;
1784                         
1785                         for(int j=0;j<numOTUs;j++){
1786                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1787                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1788                                 }
1789                                 
1790                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1791                                         offset = dist[indexOffset + j];
1792                                 }
1793                         }
1794                         
1795                         for(int j=0;j<numOTUs;j++){
1796                                 if(weight[j] > MIN_WEIGHT){
1797                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1798                                         norms[i] += newTau[j];
1799                                 }
1800                                 else{
1801                                         newTau[j] = 0.0;
1802                                 }
1803                         }
1804                         
1805                         for(int j=0;j<numOTUs;j++){
1806                                 newTau[j] /= norms[i];
1807                         }
1808                         
1809                         for(int j=0;j<numOTUs;j++){
1810                                 if(newTau[j] > MIN_TAU){
1811                                         
1812                                         int oldTotal = total;
1813                                         
1814                                         total++;
1815                                         
1816                                         singleTau.resize(total, 0);
1817                                         seqNumber.resize(total, 0);
1818                                         seqIndex.resize(total, 0);
1819                                         
1820                                         singleTau[oldTotal] = newTau[j];
1821                                         
1822                                         aaP[j][nSeqsPerOTU[j]] = oldTotal;
1823                                         aaI[j][nSeqsPerOTU[j]] = i;
1824                                         nSeqsPerOTU[j]++;
1825                                 }
1826                         }
1827                 }
1828         }
1829         catch(exception& e) {
1830                 m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
1831                 exit(1);        
1832         }               
1833 }
1834
1835 /**************************************************************************************************/
1836
1837 void ShhherCommand::setOTUs(){
1838         
1839         try {
1840                 vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
1841                 
1842                 for(int i=0;i<numOTUs;i++){
1843                         for(int j=0;j<nSeqsPerOTU[i];j++){
1844                                 int index = cumNumSeqs[i] + j;
1845                                 double tauValue = singleTau[seqNumber[index]];
1846                                 int sIndex = seqIndex[index];
1847                                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
1848                         }
1849                 }
1850                 
1851                 for(int i=0;i<numSeqs;i++){
1852                         double maxTau = -1.0000;
1853                         int maxOTU = -1;
1854                         for(int j=0;j<numOTUs;j++){
1855                                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
1856                                         maxTau = bigTauMatrix[i * numOTUs + j];
1857                                         maxOTU = j;
1858                                 }
1859                         }
1860                         
1861                         otuData[i] = maxOTU;
1862                 }
1863                 
1864                 nSeqsPerOTU.assign(numOTUs, 0);         
1865                 
1866                 for(int i=0;i<numSeqs;i++){
1867                         int index = otuData[i];
1868                         
1869                         singleTau[i] = 1.0000;
1870                         dist[i] = 0.0000;
1871                         
1872                         aaP[index][nSeqsPerOTU[index]] = i;
1873                         aaI[index][nSeqsPerOTU[index]] = i;
1874                         
1875                         nSeqsPerOTU[index]++;
1876                 }
1877                 fill(); 
1878         }
1879         catch(exception& e) {
1880                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
1881                 exit(1);        
1882         }               
1883 }
1884
1885 /**************************************************************************************************/
1886
1887 void ShhherCommand::writeQualities(vector<int> otuCounts){
1888         
1889         try {
1890                 string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.qual";
1891
1892                 ofstream qualityFile;
1893                 m->openOutputFile(qualityFileName, qualityFile);
1894
1895                 qualityFile.setf(ios::fixed, ios::floatfield);
1896                 qualityFile.setf(ios::showpoint);
1897                 qualityFile << setprecision(6);
1898                 
1899                 vector<vector<int> > qualities(numOTUs);
1900                 vector<double> pr(HOMOPS, 0);
1901                 
1902                 
1903                 for(int i=0;i<numOTUs;i++){
1904                         int index = 0;
1905                         int base = 0;
1906                         
1907                         if(nSeqsPerOTU[i] > 0){
1908                                 qualities[i].assign(1024, -1);
1909                                 
1910                                 while(index < numFlowCells){
1911                                         double maxPrValue = 1e8;
1912                                         short maxPrIndex = -1;
1913                                         double count = 0.0000;
1914                                         
1915                                         pr.assign(HOMOPS, 0);
1916                                         
1917                                         for(int j=0;j<nSeqsPerOTU[i];j++){
1918                                                 int lIndex = cumNumSeqs[i] + j;
1919                                                 double tauValue = singleTau[seqNumber[lIndex]];
1920                                                 int sequenceIndex = aaI[i][j];
1921                                                 short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1922                                                 
1923                                                 count += tauValue;
1924                                                 
1925                                                 for(int s=0;s<HOMOPS;s++){
1926                                                         pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
1927                                                 }
1928                                         }
1929                                         
1930                                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1931                                         maxPrValue = pr[maxPrIndex];
1932                                         
1933                                         if(count > MIN_COUNT){
1934                                                 double U = 0.0000;
1935                                                 double norm = 0.0000;
1936                                                 
1937                                                 for(int s=0;s<HOMOPS;s++){
1938                                                         norm += exp(-(pr[s] - maxPrValue));
1939                                                 }
1940                                                 
1941                                                 for(int s=1;s<=maxPrIndex;s++){
1942                                                         int value = 0;
1943                                                         double temp = 0.0000;
1944                                                         
1945                                                         U += exp(-(pr[s-1]-maxPrValue))/norm;
1946                                                         
1947                                                         if(U>0.00){
1948                                                                 temp = log10(U);
1949                                                         }
1950                                                         else{
1951                                                                 temp = -10.1;
1952                                                         }
1953                                                         temp = floor(-10 * temp);
1954                                                         value = (int)floor(temp);
1955                                                         if(value > 100){        value = 100;    }
1956                                                         
1957                                                         qualities[i][base] = (int)value;
1958                                                         base++;
1959                                                 }
1960                                         }
1961                                         
1962                                         index++;
1963                                 }
1964                         }
1965                         
1966                         
1967                         if(otuCounts[i] > 0){
1968                                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1969                                 
1970                                 int j=4;        //need to get past the first four bases
1971                                 while(qualities[i][j] != -1){
1972                                         qualityFile << qualities[i][j] << ' ';
1973                                         j++;
1974                                 }
1975                                 qualityFile << endl;
1976                         }
1977                 }
1978                 qualityFile.close();
1979                 
1980         }
1981         catch(exception& e) {
1982                 m->errorOut(e, "ShhherCommand", "writeQualities");
1983                 exit(1);        
1984         }               
1985 }
1986
1987 /**************************************************************************************************/
1988
1989 void ShhherCommand::writeSequences(vector<int> otuCounts){
1990         try {
1991                 
1992                 string bases = "TACG";
1993                 
1994                 string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta";
1995                 ofstream fastaFile;
1996                 m->openOutputFile(fastaFileName, fastaFile);
1997                 
1998                 vector<string> names(numOTUs, "");
1999                 
2000                 for(int i=0;i<numOTUs;i++){
2001                         int index = centroids[i];
2002                         
2003                         if(otuCounts[i] > 0){
2004                                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
2005                                 
2006                                 for(int j=8;j<numFlowCells;j++){
2007                                         
2008                                         char base = bases[j % 4];
2009                                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
2010                                                 fastaFile << base;
2011                                         }
2012                                 }
2013                                 fastaFile << endl;
2014                         }
2015                 }
2016                 fastaFile.close();
2017         }
2018         catch(exception& e) {
2019                 m->errorOut(e, "ShhherCommand", "writeSequences");
2020                 exit(1);        
2021         }               
2022 }
2023
2024 /**************************************************************************************************/
2025
2026 void ShhherCommand::writeNames(vector<int> otuCounts){
2027         try {
2028                 string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.final.names";
2029                 ofstream nameFile;
2030                 m->openOutputFile(nameFileName, nameFile);
2031                 
2032                 for(int i=0;i<numOTUs;i++){
2033                         if(otuCounts[i] > 0){
2034                                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
2035                                 
2036                                 for(int j=1;j<nSeqsPerOTU[i];j++){
2037                                         nameFile << ',' << seqNameVector[aaI[i][j]];
2038                                 }
2039                                 
2040                                 nameFile << endl;
2041                         }
2042                 }
2043                 nameFile.close();
2044         }
2045         catch(exception& e) {
2046                 m->errorOut(e, "ShhherCommand", "writeNames");
2047                 exit(1);        
2048         }               
2049 }
2050
2051 /**************************************************************************************************/
2052
2053 void ShhherCommand::writeGroups(){
2054         try {
2055                 string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.'));
2056                 string groupFileName = fileRoot + ".pn.groups";
2057                 ofstream groupFile;
2058                 m->openOutputFile(groupFileName, groupFile);
2059                 
2060                 for(int i=0;i<numSeqs;i++){
2061                         groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
2062                 }
2063                 groupFile.close();
2064         }
2065         catch(exception& e) {
2066                 m->errorOut(e, "ShhherCommand", "writeGroups");
2067                 exit(1);        
2068         }               
2069 }
2070
2071 /**************************************************************************************************/
2072
2073 void ShhherCommand::writeClusters(vector<int> otuCounts){
2074         try {
2075                 string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.counts";
2076                 ofstream otuCountsFile;
2077                 m->openOutputFile(otuCountsFileName, otuCountsFile);
2078                 
2079                 string bases = "TACG";
2080                 
2081                 for(int i=0;i<numOTUs;i++){
2082                         //output the translated version of the centroid sequence for the otu
2083                         if(otuCounts[i] > 0){
2084                                 int index = centroids[i];
2085                                 
2086                                 otuCountsFile << "ideal\t";
2087                                 for(int j=8;j<numFlowCells;j++){
2088                                         char base = bases[j % 4];
2089                                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
2090                                                 otuCountsFile << base;
2091                                         }
2092                                 }
2093                                 otuCountsFile << endl;
2094                                 
2095                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2096                                         int sequence = aaI[i][j];
2097                                         otuCountsFile << seqNameVector[sequence] << '\t';
2098                                         
2099                                         for(int k=8;k<lengths[sequence];k++){
2100                                                 char base = bases[k % 4];
2101                                                 int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
2102                                                 
2103                                                 for(int s=0;s<freq;s++){
2104                                                         otuCountsFile << base;
2105                                                 }
2106                                         }
2107                                         otuCountsFile << endl;
2108                                 }
2109                                 otuCountsFile << endl;
2110                         }
2111                 }
2112                 otuCountsFile.close();
2113         }
2114         catch(exception& e) {
2115                 m->errorOut(e, "ShhherCommand", "writeClusters");
2116                 exit(1);        
2117         }               
2118 }
2119
2120 //**********************************************************************************************************************