]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.cpp
fixed makefile
[mothur.git] / shhhercommand.cpp
1 /*
2  *  shhher.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ShhherCommand::setParameters(){  
14         try {
15                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
16                 CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
17                 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
18                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
21                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
22                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
23                 CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "ShhherCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string ShhherCommand::getHelpString(){  
38         try {
39                 string helpString = "";
40                 helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
41                 return helpString;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ShhherCommand", "getHelpString");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49
50 ShhherCommand::ShhherCommand(){ 
51         try {
52                 abort = true; calledHelp = true;
53                 setParameters();
54                 
55                 //initialize outputTypes
56 //              vector<string> tempOutNames;
57 //              outputTypes["pn.dist"] = tempOutNames;
58
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
62                 exit(1);
63         }
64 }
65
66 //**********************************************************************************************************************
67
68 ShhherCommand::ShhherCommand(string option) {
69         try {
70                 abort = false; calledHelp = false;   
71                 
72                 //allow user to run help
73                 if(option == "help") { help(); abort = true; calledHelp = true; }
74                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
75                 
76                 else {
77                         vector<string> myArray = setParameters();
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         map<string,string>::iterator it;
84                         
85                         //check to make sure all parameters are valid for command
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92 //                      outputTypes["pn.dist"] = tempOutNames;
93                         //                      outputTypes["fasta"] = tempOutNames;
94                         
95                         //if the user changes the input directory command factory will send this info to us in the output parameter 
96                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
97                         if (inputDir == "not found"){   inputDir = "";          }
98                         else {
99                                 string path;
100                                 it = parameters.find("flow");
101                                 //user has given a template file
102                                 if(it != parameters.end()){ 
103                                         path = m->hasPath(it->second);
104                                         //if the user has not given a path then, add inputdir. else leave path alone.
105                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
106                                 }
107                                 
108                                 it = parameters.find("lookup");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
114                                 }
115
116                                 it = parameters.find("file");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
122                                 }
123                         }
124                         
125                         
126                         //check for required parameters
127                         flowFileName = validParameter.validFile(parameters, "flow", true);
128                         flowFilesFileName = validParameter.validFile(parameters, "file", true);
129                         if (flowFileName == "not found" && flowFilesFileName == "not found") {
130                                 m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
131                                 m->mothurOutEndLine();
132                                 abort = true; 
133                         }
134                         else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
135                         
136                         if(flowFileName != "not found"){
137                                 compositeFASTAFileName = "";    
138                                 compositeNamesFileName = "";    
139                         }
140                         else{
141                                 ofstream temp;
142
143                                 //flow.files = 9 character offset
144                                 compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
145                                 m->openOutputFile(compositeFASTAFileName, temp);
146                                 temp.close();
147                                 
148                                 compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names";
149                                 m->openOutputFile(compositeNamesFileName, temp);
150                                 temp.close();
151                         }
152             
153             if(flowFilesFileName != "not found"){
154                 string fName;
155                 
156                 ifstream flowFilesFile;
157                 m->openInputFile(flowFilesFileName, flowFilesFile);
158                 while(flowFilesFile){
159                     fName = m->getline(flowFilesFile);
160                     
161                     //test if file is valid
162                     ifstream in;
163                     int ableToOpen = m->openInputFile(fName, in, "noerror");
164                     in.close(); 
165                     if (ableToOpen == 1) {
166                         if (inputDir != "") { //default path is set
167                             string tryPath = inputDir + fName;
168                             m->mothurOut("Unable to open " + fName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
169                             ifstream in2;
170                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
171                             in2.close();
172                             fName = tryPath;
173                         }
174                     }
175                     
176                     if (ableToOpen == 1) {
177                         if (m->getDefaultPath() != "") { //default path is set
178                             string tryPath = m->getDefaultPath() + m->getSimpleName(fName);
179                             m->mothurOut("Unable to open " + fName + ". Trying default " + tryPath); m->mothurOutEndLine();
180                             ifstream in2;
181                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
182                             in2.close();
183                             fName = tryPath;
184                         }
185                     }
186                     
187                     //if you can't open it its not in current working directory or inputDir, try mothur excutable location
188                     if (ableToOpen == 1) {
189                         string exepath = m->argv;
190                         string tempPath = exepath;
191                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
192                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
193                         
194                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(fName);
195                         m->mothurOut("Unable to open " + fName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
196                         ifstream in2;
197                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
198                         in2.close();
199                         fName = tryPath;
200                     }
201                     
202                     if (ableToOpen == 1) {  m->mothurOut("Unable to open " + fName + ". Disregarding. "); m->mothurOutEndLine();  }
203                     else { flowFileVector.push_back(fName); }
204                     m->gobble(flowFilesFile);
205                 }
206                 flowFilesFile.close();
207                 if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
208             }
209             else{
210                 flowFileVector.push_back(flowFileName);
211             }
212
213                         
214                         //if the user changes the output directory command factory will send this info to us in the output parameter 
215                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
216                                 outputDir = ""; 
217                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
218                         }
219                         
220                         
221                         //check for optional parameter and set defaults
222                         // ...at some point should added some additional type checking...
223                         string temp;
224                         temp = validParameter.validFile(parameters, "lookup", true);
225                         if (temp == "not found")        {       
226                                 lookupFileName = "LookUp_Titanium.pat"; 
227                                 
228                                 int ableToOpen;
229                                 ifstream in;
230                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
231                                 in.close();     
232                                 
233                                 //if you can't open it, try input location
234                                 if (ableToOpen == 1) {
235                                         if (inputDir != "") { //default path is set
236                                                 string tryPath = inputDir + lookupFileName;
237                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
238                                                 ifstream in2;
239                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
240                                                 in2.close();
241                                                 lookupFileName = tryPath;
242                                         }
243                                 }
244                                 
245                                 //if you can't open it, try default location
246                                 if (ableToOpen == 1) {
247                                         if (m->getDefaultPath() != "") { //default path is set
248                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
249                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
250                                                 ifstream in2;
251                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
252                                                 in2.close();
253                                                 lookupFileName = tryPath;
254                                         }
255                                 }
256                                 
257                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
258                                 if (ableToOpen == 1) {
259                                         string exepath = m->argv;
260                                         string tempPath = exepath;
261                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
262                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
263                                         
264                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
265                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
266                                         ifstream in2;
267                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
268                                         in2.close();
269                                         lookupFileName = tryPath;
270                                 }
271                                 
272                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
273                         }
274                         else if(temp == "not open")     {       
275                                 
276                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
277                                 
278                                 //if you can't open it its not inputDir, try mothur excutable location
279                                 string exepath = m->argv;
280                                 string tempPath = exepath;
281                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
282                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
283                                         
284                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
285                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
286                                 ifstream in2;
287                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
288                                 in2.close();
289                                 lookupFileName = tryPath;
290                                 
291                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
292                         }else                                           {       lookupFileName = temp;  }
293                         
294                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
295                         m->setProcessors(temp);
296                         m->mothurConvert(temp, processors);
297
298                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
299                         m->mothurConvert(temp, cutoff); 
300                         
301                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
302                         m->mothurConvert(temp, minDelta); 
303
304                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
305                         m->mothurConvert(temp, maxIters); 
306
307                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
308                         m->mothurConvert(temp, sigma); 
309                         
310                         flowOrder = validParameter.validFile(parameters, "order", false);
311                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
312                         else if(flowOrder.length() != 4){
313                                 m->mothurOut("The value of the order option must be four bases long\n");
314                         }
315                         
316                 }
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
320                 exit(1);
321         }
322 }
323 //**********************************************************************************************************************
324 #ifdef USE_MPI
325 int ShhherCommand::execute(){
326         try {
327                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
328                 
329                 int tag = 1976;
330                 MPI_Status status; 
331                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
332                 MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
333         
334                 if(pid == 0){
335
336                         for(int i=1;i<ncpus;i++){
337                                 MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
338                         }
339                         if(abort == 1){ return 0;       }
340
341                         processors = ncpus;
342                         
343                         m->mothurOut("\nGetting preliminary data...\n");
344                         getSingleLookUp();      if (m->control_pressed) { return 0; }
345                         getJointLookUp();       if (m->control_pressed) { return 0; }
346                         
347                         int numFiles = flowFileVector.size();
348
349                         for(int i=1;i<ncpus;i++){
350                                 MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
351                         }
352                         
353                         for(int i=0;i<numFiles;i++){
354                                 
355                                 if (m->control_pressed) { break; }
356                                 
357                                 double begClock = clock();
358                                 unsigned long long begTime = time(NULL);
359
360                                 flowFileName = flowFileVector[i];
361                                 
362                                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
363                                 m->mothurOut("Reading flowgrams...\n");
364                                 getFlowData();
365                                 
366                                 if (m->control_pressed) { break; }
367
368                                 m->mothurOut("Identifying unique flowgrams...\n");
369                                 getUniques();
370                                 
371                                 if (m->control_pressed) { break; }
372
373                                 m->mothurOut("Calculating distances between flowgrams...\n");
374                                 char fileName[1024];
375                                 strcpy(fileName, flowFileName.c_str());
376
377                                 for(int i=1;i<ncpus;i++){
378                                         MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
379
380                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
381                                         MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
382                                         MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
383                                         MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
384                                         MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
385                                         MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
386                                         MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
387                                         MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
388                                         MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
389                                         MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
390                                 }                               
391                                                         
392                                 string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
393                                 
394                                 if (m->control_pressed) { break; }
395                                 
396                                 int done;
397                                 for(int i=1;i<ncpus;i++){
398                                         MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
399                                         
400                                         m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
401                                         m->mothurRemove((distFileName + ".temp." + toString(i)));
402                                 }
403
404                                 string namesFileName = createNamesFile();
405                                 
406                                 if (m->control_pressed) { break; }
407                                 
408                                 m->mothurOut("\nClustering flowgrams...\n");
409                                 string listFileName = cluster(distFileName, namesFileName);
410                                 
411                                 if (m->control_pressed) { break; }
412                                 
413                                 getOTUData(listFileName);
414
415                                 m->mothurRemove(distFileName);
416                                 m->mothurRemove(namesFileName);
417                                 m->mothurRemove(listFileName);
418                                 
419                                 if (m->control_pressed) { break; }
420                                 
421                                 initPyroCluster();
422
423                                 if (m->control_pressed) { break; }
424                                 
425                                 for(int i=1;i<ncpus;i++){
426                                         MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
427                                         MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
428                                         MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
429                                         MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
430                                 }
431                                 
432                                 if (m->control_pressed) { break; }
433                                 
434                                 double maxDelta = 0;
435                                 int iter = 0;
436                                 
437                                 int numOTUsOnCPU = numOTUs / ncpus;
438                                 int numSeqsOnCPU = numSeqs / ncpus;
439                                 m->mothurOut("\nDenoising flowgrams...\n");
440                                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
441                                 
442                                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
443                                         
444                                         double cycClock = clock();
445                                         unsigned long long cycTime = time(NULL);
446                                         fill();
447                                         
448                                         if (m->control_pressed) { break; }
449
450                                         int total = singleTau.size();
451                                         for(int i=1;i<ncpus;i++){
452                                                 MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
453                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
454                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
455                                                 
456                                                 MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
457                                                 MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
458                                                 MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
459                                                 MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
460                                                 MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
461                                         }
462                                         
463                                         calcCentroidsDriver(0, numOTUsOnCPU);
464                                         
465                                         for(int i=1;i<ncpus;i++){
466                                                 int otuStart = i * numOTUs / ncpus;
467                                                 int otuStop = (i + 1) * numOTUs / ncpus;
468                                                 
469                                                 vector<int> tempCentroids(numOTUs, 0);
470                                                 vector<short> tempChange(numOTUs, 0);
471                                                 
472                                                 MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
473                                                 MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
474                                                 
475                                                 for(int j=otuStart;j<otuStop;j++){
476                                                         centroids[j] = tempCentroids[j];
477                                                         change[j] = tempChange[j];
478                                                 }
479                                         }
480                                                                         
481                                         maxDelta = getNewWeights(); if (m->control_pressed) { break; }
482                                         double nLL = getLikelihood(); if (m->control_pressed) { break; }
483                                         checkCentroids(); if (m->control_pressed) { break; }
484                                         
485                                         for(int i=1;i<ncpus;i++){
486                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
487                                                 MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
488                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
489                                         }
490                                         
491                                         calcNewDistancesParent(0, numSeqsOnCPU);
492
493                                         total = singleTau.size();
494
495                                         for(int i=1;i<ncpus;i++){
496                                                 int childTotal;
497                                                 int seqStart = i * numSeqs / ncpus;
498                                                 int seqStop = (i + 1) * numSeqs / ncpus;
499                                                 
500                                                 MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
501
502                                                 vector<int> childSeqIndex(childTotal, 0);
503                                                 vector<double> childSingleTau(childTotal, 0);
504                                                 vector<double> childDist(numSeqs * numOTUs, 0);
505                                                 vector<int> otuIndex(childTotal, 0);
506                                                 
507                                                 MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
508                                                 MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
509                                                 MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
510                                                 MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
511
512                                                 int oldTotal = total;
513                                                 total += childTotal;
514                                                 singleTau.resize(total, 0);
515                                                 seqIndex.resize(total, 0);
516                                                 seqNumber.resize(total, 0);
517                                                 
518                                                 int childIndex = 0;
519                                                 
520                                                 for(int j=oldTotal;j<total;j++){
521                                                         int otuI = otuIndex[childIndex];
522                                                         int seqI = childSeqIndex[childIndex];
523                                                         
524                                                         singleTau[j] = childSingleTau[childIndex];
525                                                         
526                                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
527                                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
528                                                         nSeqsPerOTU[otuI]++;
529                                                         childIndex++;
530                                                 }
531                                                 
532                                                 int index = seqStart * numOTUs;
533                                                 for(int j=seqStart;j<seqStop;j++){
534                                                         for(int k=0;k<numOTUs;k++){
535                                                                 dist[index] = childDist[index];
536                                                                 index++;
537                                                         }
538                                                 }                                       
539                                         }
540                                         
541                                         iter++;
542                                         
543                                         m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
544
545                                         if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
546                                                 int live = 1;
547                                                 for(int i=1;i<ncpus;i++){
548                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
549                                                 }
550                                         }
551                                         else{
552                                                 int live = 0;
553                                                 for(int i=1;i<ncpus;i++){
554                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
555                                                 }
556                                         }
557                                         
558                                 }       
559                                 
560                                 if (m->control_pressed) { break; }
561                                 
562                                 m->mothurOut("\nFinalizing...\n");
563                                 fill();
564                                 
565                                 if (m->control_pressed) { break; }
566                                 
567                                 setOTUs();
568                                 
569                                 vector<int> otuCounts(numOTUs, 0);
570                                 for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
571                                 calcCentroidsDriver(0, numOTUs);
572                                 
573                                 if (m->control_pressed) { break; }
574                                 
575                                 writeQualities(otuCounts);      if (m->control_pressed) { break; }
576                                 writeSequences(otuCounts);      if (m->control_pressed) { break; }
577                                 writeNames(otuCounts);          if (m->control_pressed) { break; }
578                                 writeClusters(otuCounts);       if (m->control_pressed) { break; }
579                                 writeGroups();                          if (m->control_pressed) { break; }
580                                 
581                                                                  
582                                 m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
583                         }
584                 }
585                 else{
586                         int abort = 1;
587
588                         MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
589                         if(abort){      return 0;       }
590
591                         int numFiles;
592                         MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
593
594                         for(int i=0;i<numFiles;i++){
595                                 
596                                 if (m->control_pressed) { break; }
597                                 
598                                 //Now into the pyrodist part
599                                 bool live = 1;
600
601                                 char fileName[1024];
602                                 MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
603                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
604                                 MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
605                                 MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
606                                 
607                                 flowDataIntI.resize(numSeqs * numFlowCells);
608                                 flowDataPrI.resize(numSeqs * numFlowCells);
609                                 mapUniqueToSeq.resize(numSeqs);
610                                 mapSeqToUnique.resize(numSeqs);
611                                 lengths.resize(numSeqs);
612                                 jointLookUp.resize(NUMBINS * NUMBINS);
613
614                                 MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
615                                 MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
616                                 MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
617                                 MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
618                                 MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
619                                 MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
620                                 MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
621
622                                 flowFileName = string(fileName);
623                                 int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
624                                 int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
625                                 
626                                 string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
627                                 
628                                 if (m->control_pressed) { break; }
629                                 
630                                 int done = 1;
631                                 MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
632
633                                 //Now into the pyronoise part
634                                 MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
635                                 
636                                 singleLookUp.resize(HOMOPS * NUMBINS);
637                                 uniqueFlowgrams.resize(numUniques * numFlowCells);
638                                 weight.resize(numOTUs);
639                                 centroids.resize(numOTUs);
640                                 change.resize(numOTUs);
641                                 dist.assign(numOTUs * numSeqs, 0);
642                                 nSeqsPerOTU.resize(numOTUs);
643                                 cumNumSeqs.resize(numOTUs);
644
645                                 MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
646                                 MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
647                                 MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
648                                 
649                                 int startOTU = pid * numOTUs / ncpus;
650                                 int endOTU = (pid + 1) * numOTUs / ncpus;
651
652                                 int startSeq = pid * numSeqs / ncpus;
653                                 int endSeq = (pid + 1) * numSeqs /ncpus;
654                                 
655                                 int total;
656
657                                 while(live){
658                                         
659                                         if (m->control_pressed) { break; }
660                                         
661                                         MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
662                                         singleTau.assign(total, 0.0000);
663                                         seqNumber.assign(total, 0);
664                                         seqIndex.assign(total, 0);
665
666                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
667                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
668                                         MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
669                                         MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
670                                         MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
671                                         MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
672                                         MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
673
674                                         calcCentroidsDriver(startOTU, endOTU);
675
676                                         MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
677                                         MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
678
679                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
680                                         MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
681                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
682
683                                         vector<int> otuIndex(total, 0);
684                                         calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
685                                         total = otuIndex.size();
686                                         
687                                         MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
688                                         MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
689                                         MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
690                                         MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
691                                         MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
692                                 
693                                         MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
694                                 }
695                         }
696                 }
697                 
698                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
699                 
700                 MPI_Barrier(MPI_COMM_WORLD);
701
702                 
703                 if(compositeFASTAFileName != ""){
704                         outputNames.push_back(compositeFASTAFileName);
705                         outputNames.push_back(compositeNamesFileName);
706                 }
707
708                 m->mothurOutEndLine();
709                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
710                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
711                 m->mothurOutEndLine();
712                 
713                 return 0;
714
715         }
716         catch(exception& e) {
717                 m->errorOut(e, "ShhherCommand", "execute");
718                 exit(1);
719         }
720 }
721 /**************************************************************************************************/
722 string ShhherCommand::createNamesFile(){
723         try{
724                 
725                 vector<string> duplicateNames(numUniques, "");
726                 for(int i=0;i<numSeqs;i++){
727                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
728                 }
729                 
730                 string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
731                 
732                 ofstream nameFile;
733                 m->openOutputFile(nameFileName, nameFile);
734                 
735                 for(int i=0;i<numUniques;i++){
736                         
737                         if (m->control_pressed) { break; }
738                         
739             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
740                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
741                 }
742                 
743                 nameFile.close();
744                 return  nameFileName;
745         }
746         catch(exception& e) {
747                 m->errorOut(e, "ShhherCommand", "createNamesFile");
748                 exit(1);
749         }
750 }
751 /**************************************************************************************************/
752
753 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
754         try{            
755                 ostringstream outStream;
756                 outStream.setf(ios::fixed, ios::floatfield);
757                 outStream.setf(ios::dec, ios::basefield);
758                 outStream.setf(ios::showpoint);
759                 outStream.precision(6);
760                 
761                 int begTime = time(NULL);
762                 double begClock = clock();
763                 
764                 for(int i=startSeq;i<stopSeq;i++){
765                         
766                         if (m->control_pressed) { break; }
767                         
768                         for(int j=0;j<i;j++){
769                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
770                                 
771                                 if(flowDistance < 1e-6){
772                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
773                                 }
774                                 else if(flowDistance <= cutoff){
775                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
776                                 }
777                         }
778                         if(i % 100 == 0){
779                                 m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
780                         }
781                 }
782                 
783                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
784                 if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
785                 
786                 if (m->control_pressed) { return fDistFileName; }
787                 
788                 m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
789
790                 ofstream distFile(fDistFileName.c_str());
791                 distFile << outStream.str();            
792                 distFile.close();
793                 
794                 return fDistFileName;
795         }
796         catch(exception& e) {
797                 m->errorOut(e, "ShhherCommand", "flowDistMPI");
798                 exit(1);
799         }
800 }
801 /**************************************************************************************************/
802  
803 void ShhherCommand::getOTUData(string listFileName){
804     try {
805         
806         ifstream listFile;
807         m->openInputFile(listFileName, listFile);
808         string label;
809         
810         listFile >> label >> numOTUs;
811         
812         otuData.assign(numSeqs, 0);
813         cumNumSeqs.assign(numOTUs, 0);
814         nSeqsPerOTU.assign(numOTUs, 0);
815         aaP.clear();aaP.resize(numOTUs);
816         
817         seqNumber.clear();
818         aaI.clear();
819         seqIndex.clear();
820         
821         string singleOTU = "";
822         
823         for(int i=0;i<numOTUs;i++){
824             
825             if (m->control_pressed) { break; }
826             
827             listFile >> singleOTU;
828             
829             istringstream otuString(singleOTU);
830             
831             while(otuString){
832                 
833                 string seqName = "";
834                 
835                 for(int j=0;j<singleOTU.length();j++){
836                     char letter = otuString.get();
837                     
838                     if(letter != ','){
839                         seqName += letter;
840                     }
841                     else{
842                         map<string,int>::iterator nmIt = nameMap.find(seqName);
843                         int index = nmIt->second;
844                         
845                         nameMap.erase(nmIt);
846                         
847                         otuData[index] = i;
848                         nSeqsPerOTU[i]++;
849                         aaP[i].push_back(index);
850                         seqName = "";
851                     }
852                 }
853                 
854                 map<string,int>::iterator nmIt = nameMap.find(seqName);
855                 
856                 int index = nmIt->second;
857                 nameMap.erase(nmIt);
858                 
859                 otuData[index] = i;
860                 nSeqsPerOTU[i]++;
861                 aaP[i].push_back(index);        
862                 
863                 otuString.get();
864             }
865             
866             sort(aaP[i].begin(), aaP[i].end());
867             for(int j=0;j<nSeqsPerOTU[i];j++){
868                 seqNumber.push_back(aaP[i][j]);
869             }
870             for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
871                 aaP[i].push_back(0);
872             }
873             
874             
875         }
876         
877         for(int i=1;i<numOTUs;i++){
878             cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
879         }
880         aaI = aaP;
881         seqIndex = seqNumber;
882         
883         listFile.close();       
884         
885     }
886     catch(exception& e) {
887         m->errorOut(e, "ShhherCommand", "getOTUData");
888         exit(1);        
889     }           
890 }
891
892 /**************************************************************************************************/
893
894 void ShhherCommand::initPyroCluster(){                          
895     try{
896         if (numOTUs < processors) { processors = 1; }
897         
898         dist.assign(numSeqs * numOTUs, 0);
899         change.assign(numOTUs, 1);
900         centroids.assign(numOTUs, -1);
901         weight.assign(numOTUs, 0);
902         singleTau.assign(numSeqs, 1.0);
903         
904         nSeqsBreaks.assign(processors+1, 0);
905         nOTUsBreaks.assign(processors+1, 0);
906         
907         nSeqsBreaks[0] = 0;
908         for(int i=0;i<processors;i++){
909             nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
910             nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
911         }
912         nSeqsBreaks[processors] = numSeqs;
913         nOTUsBreaks[processors] = numOTUs;
914     }
915     catch(exception& e) {
916         m->errorOut(e, "ShhherCommand", "initPyroCluster");
917         exit(1);        
918     }
919 }
920
921 /**************************************************************************************************/
922
923 void ShhherCommand::fill(){
924     try {
925         int index = 0;
926         for(int i=0;i<numOTUs;i++){
927             
928             if (m->control_pressed) { break; }
929             
930             cumNumSeqs[i] = index;
931             for(int j=0;j<nSeqsPerOTU[i];j++){
932                 seqNumber[index] = aaP[i][j];
933                 seqIndex[index] = aaI[i][j];
934                 
935                 index++;
936             }
937         }
938     }
939     catch(exception& e) {
940         m->errorOut(e, "ShhherCommand", "fill");
941         exit(1);        
942     }           
943 }
944
945 /**************************************************************************************************/
946  
947 void ShhherCommand::getFlowData(){
948     try{
949         ifstream flowFile;
950         m->openInputFile(flowFileName, flowFile);
951         
952         string seqName;
953         seqNameVector.clear();
954         lengths.clear();
955         flowDataIntI.clear();
956         nameMap.clear();
957         
958         
959         int currentNumFlowCells;
960         
961         float intensity;
962         
963         flowFile >> numFlowCells;
964         int index = 0;//pcluster
965         while(!flowFile.eof()){
966             
967             if (m->control_pressed) { break; }
968             
969             flowFile >> seqName >> currentNumFlowCells;
970             lengths.push_back(currentNumFlowCells);
971             
972             seqNameVector.push_back(seqName);
973             nameMap[seqName] = index++;//pcluster
974             
975             for(int i=0;i<numFlowCells;i++){
976                 flowFile >> intensity;
977                 if(intensity > 9.99)    {       intensity = 9.99;       }
978                 int intI = int(100 * intensity + 0.0001);
979                 flowDataIntI.push_back(intI);
980             }
981             m->gobble(flowFile);
982         }
983         flowFile.close();
984         
985         numSeqs = seqNameVector.size();         
986         
987         for(int i=0;i<numSeqs;i++){
988             
989             if (m->control_pressed) { break; }
990             
991             int iNumFlowCells = i * numFlowCells;
992             for(int j=lengths[i];j<numFlowCells;j++){
993                 flowDataIntI[iNumFlowCells + j] = 0;
994             }
995         }
996         
997     }
998     catch(exception& e) {
999         m->errorOut(e, "ShhherCommand", "getFlowData");
1000         exit(1);
1001     }
1002 }
1003 /**************************************************************************************************/
1004 void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
1005         
1006         try{
1007                 vector<double> newTau(numOTUs,0);
1008                 vector<double> norms(numSeqs, 0);
1009                 otuIndex.clear();
1010                 seqIndex.clear();
1011                 singleTau.clear();
1012                 
1013                 for(int i=startSeq;i<stopSeq;i++){
1014                         
1015                         if (m->control_pressed) { break; }
1016                         
1017                         double offset = 1e8;
1018                         int indexOffset = i * numOTUs;
1019                         
1020                         for(int j=0;j<numOTUs;j++){
1021                                 
1022                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1023                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1024                                 }
1025                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1026                                         offset = dist[indexOffset + j];
1027                                 }
1028                         }
1029                         
1030                         for(int j=0;j<numOTUs;j++){
1031                                 if(weight[j] > MIN_WEIGHT){
1032                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1033                                         norms[i] += newTau[j];
1034                                 }
1035                                 else{
1036                                         newTau[j] = 0.0;
1037                                 }
1038                         }
1039                         
1040                         for(int j=0;j<numOTUs;j++){
1041                 
1042                                 newTau[j] /= norms[i];
1043                                 
1044                                 if(newTau[j] > MIN_TAU){
1045                                         otuIndex.push_back(j);
1046                                         seqIndex.push_back(i);
1047                                         singleTau.push_back(newTau[j]);
1048                                 }
1049                         }
1050                         
1051                 }
1052         }
1053         catch(exception& e) {
1054                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
1055                 exit(1);        
1056         }               
1057 }
1058
1059 /**************************************************************************************************/
1060
1061 void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
1062     
1063     try{
1064         
1065         int total = 0;
1066         vector<double> newTau(numOTUs,0);
1067         vector<double> norms(numSeqs, 0);
1068         nSeqsPerOTU.assign(numOTUs, 0);
1069         
1070         for(int i=startSeq;i<stopSeq;i++){
1071             
1072             if (m->control_pressed) { break; }
1073             
1074             int indexOffset = i * numOTUs;
1075             
1076             double offset = 1e8;
1077             
1078             for(int j=0;j<numOTUs;j++){
1079                 
1080                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1081                     dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1082                 }
1083                 
1084                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1085                     offset = dist[indexOffset + j];
1086                 }
1087             }
1088             
1089             for(int j=0;j<numOTUs;j++){
1090                 if(weight[j] > MIN_WEIGHT){
1091                     newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1092                     norms[i] += newTau[j];
1093                 }
1094                 else{
1095                     newTau[j] = 0.0;
1096                 }
1097             }
1098             
1099             for(int j=0;j<numOTUs;j++){
1100                 newTau[j] /= norms[i];
1101             }
1102             
1103             for(int j=0;j<numOTUs;j++){
1104                 if(newTau[j] > MIN_TAU){
1105                     
1106                     int oldTotal = total;
1107                     
1108                     total++;
1109                     
1110                     singleTau.resize(total, 0);
1111                     seqNumber.resize(total, 0);
1112                     seqIndex.resize(total, 0);
1113                     
1114                     singleTau[oldTotal] = newTau[j];
1115                     
1116                     aaP[j][nSeqsPerOTU[j]] = oldTotal;
1117                     aaI[j][nSeqsPerOTU[j]] = i;
1118                     nSeqsPerOTU[j]++;
1119                 }
1120             }
1121             
1122         }
1123         
1124     }
1125     catch(exception& e) {
1126         m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
1127         exit(1);        
1128     }           
1129 }
1130
1131 /**************************************************************************************************/
1132
1133 void ShhherCommand::setOTUs(){
1134     
1135     try {
1136         vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
1137         
1138         for(int i=0;i<numOTUs;i++){
1139             
1140             if (m->control_pressed) { break; }
1141             
1142             for(int j=0;j<nSeqsPerOTU[i];j++){
1143                 int index = cumNumSeqs[i] + j;
1144                 double tauValue = singleTau[seqNumber[index]];
1145                 int sIndex = seqIndex[index];
1146                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
1147             }
1148         }
1149         
1150         for(int i=0;i<numSeqs;i++){
1151             double maxTau = -1.0000;
1152             int maxOTU = -1;
1153             for(int j=0;j<numOTUs;j++){
1154                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
1155                     maxTau = bigTauMatrix[i * numOTUs + j];
1156                     maxOTU = j;
1157                 }
1158             }
1159             
1160             otuData[i] = maxOTU;
1161         }
1162         
1163         nSeqsPerOTU.assign(numOTUs, 0);         
1164         
1165         for(int i=0;i<numSeqs;i++){
1166             int index = otuData[i];
1167             
1168             singleTau[i] = 1.0000;
1169             dist[i] = 0.0000;
1170             
1171             aaP[index][nSeqsPerOTU[index]] = i;
1172             aaI[index][nSeqsPerOTU[index]] = i;
1173             
1174             nSeqsPerOTU[index]++;
1175         }
1176         fill(); 
1177     }
1178     catch(exception& e) {
1179         m->errorOut(e, "ShhherCommand", "setOTUs");
1180         exit(1);        
1181     }           
1182 }
1183
1184 /**************************************************************************************************/
1185  
1186 void ShhherCommand::getUniques(){
1187     try{
1188         
1189         
1190         numUniques = 0;
1191         uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
1192         uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
1193         uniqueLengths.assign(numSeqs, 0);
1194         mapSeqToUnique.assign(numSeqs, -1);
1195         mapUniqueToSeq.assign(numSeqs, -1);
1196         
1197         vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
1198         
1199         for(int i=0;i<numSeqs;i++){
1200             
1201             if (m->control_pressed) { break; }
1202             
1203             int index = 0;
1204             
1205             vector<short> current(numFlowCells);
1206             for(int j=0;j<numFlowCells;j++){
1207                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
1208             }
1209             
1210             for(int j=0;j<numUniques;j++){
1211                 int offset = j * numFlowCells;
1212                 bool toEnd = 1;
1213                 
1214                 int shorterLength;
1215                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
1216                 else                                                            {       shorterLength = uniqueLengths[j];       }
1217                 
1218                 for(int k=0;k<shorterLength;k++){
1219                     if(current[k] != uniqueFlowgrams[offset + k]){
1220                         toEnd = 0;
1221                         break;
1222                     }
1223                 }
1224                 
1225                 if(toEnd){
1226                     mapSeqToUnique[i] = j;
1227                     uniqueCount[j]++;
1228                     index = j;
1229                     if(lengths[i] > uniqueLengths[j])   {       uniqueLengths[j] = lengths[i];  }
1230                     break;
1231                 }
1232                 index++;
1233             }
1234             
1235             if(index == numUniques){
1236                 uniqueLengths[numUniques] = lengths[i];
1237                 uniqueCount[numUniques] = 1;
1238                 mapSeqToUnique[i] = numUniques;//anMap
1239                 mapUniqueToSeq[numUniques] = i;//anF
1240                 
1241                 for(int k=0;k<numFlowCells;k++){
1242                     uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
1243                     uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
1244                 }
1245                 
1246                 numUniques++;
1247             }
1248         }
1249         uniqueFlowDataIntI.resize(numFlowCells * numUniques);
1250         uniqueLengths.resize(numUniques);       
1251         
1252         flowDataPrI.resize(numSeqs * numFlowCells, 0);
1253         for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
1254     }
1255     catch(exception& e) {
1256         m->errorOut(e, "ShhherCommand", "getUniques");
1257         exit(1);
1258     }
1259 }
1260
1261 /**************************************************************************************************/
1262
1263 float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
1264     try{
1265         int minLength = lengths[mapSeqToUnique[seqA]];
1266         if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
1267         
1268         int ANumFlowCells = seqA * numFlowCells;
1269         int BNumFlowCells = seqB * numFlowCells;
1270         
1271         float dist = 0;
1272         
1273         for(int i=0;i<minLength;i++){
1274             
1275             if (m->control_pressed) { break; }
1276             
1277             int flowAIntI = flowDataIntI[ANumFlowCells + i];
1278             float flowAPrI = flowDataPrI[ANumFlowCells + i];
1279             
1280             int flowBIntI = flowDataIntI[BNumFlowCells + i];
1281             float flowBPrI = flowDataPrI[BNumFlowCells + i];
1282             dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
1283         }
1284         
1285         dist /= (float) minLength;
1286         return dist;
1287     }
1288     catch(exception& e) {
1289         m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
1290         exit(1);
1291     }
1292 }
1293
1294 //**********************************************************************************************************************/
1295
1296 string ShhherCommand::cluster(string distFileName, string namesFileName){
1297     try {
1298         
1299         ReadMatrix* read = new ReadColumnMatrix(distFileName);  
1300         read->setCutoff(cutoff);
1301         
1302         NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
1303         clusterNameMap->readMap();
1304         read->read(clusterNameMap);
1305         
1306         ListVector* list = read->getListVector();
1307         SparseMatrix* matrix = read->getMatrix();
1308         
1309         delete read; 
1310         delete clusterNameMap; 
1311         
1312         RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
1313         
1314         Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
1315         string tag = cluster->getTag();
1316         
1317         double clusterCutoff = cutoff;
1318         while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
1319             
1320             if (m->control_pressed) { break; }
1321             
1322             cluster->update(clusterCutoff);
1323         }
1324         
1325         list->setLabel(toString(cutoff));
1326         
1327         string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
1328         ofstream listFile;
1329         m->openOutputFile(listFileName, listFile);
1330         list->print(listFile);
1331         listFile.close();
1332         
1333         delete matrix;  delete cluster; delete rabund; delete list;
1334         
1335         return listFileName;
1336     }
1337     catch(exception& e) {
1338         m->errorOut(e, "ShhherCommand", "cluster");
1339         exit(1);        
1340     }           
1341 }
1342
1343 /**************************************************************************************************/
1344
1345 void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
1346     
1347     //this function gets the most likely homopolymer length at a flow position for a group of sequences
1348     //within an otu
1349     
1350     try{
1351         
1352         for(int i=start;i<finish;i++){
1353             
1354             if (m->control_pressed) { break; }
1355             
1356             double count = 0;
1357             int position = 0;
1358             int minFlowGram = 100000000;
1359             double minFlowValue = 1e8;
1360             change[i] = 0; //FALSE
1361             
1362             for(int j=0;j<nSeqsPerOTU[i];j++){
1363                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1364             }
1365             
1366             if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1367                 vector<double> adF(nSeqsPerOTU[i]);
1368                 vector<int> anL(nSeqsPerOTU[i]);
1369                 
1370                 for(int j=0;j<nSeqsPerOTU[i];j++){
1371                     int index = cumNumSeqs[i] + j;
1372                     int nI = seqIndex[index];
1373                     int nIU = mapSeqToUnique[nI];
1374                     
1375                     int k;
1376                     for(k=0;k<position;k++){
1377                         if(nIU == anL[k]){
1378                             break;
1379                         }
1380                     }
1381                     if(k == position){
1382                         anL[position] = nIU;
1383                         adF[position] = 0.0000;
1384                         position++;
1385                     }                                           
1386                 }
1387                 
1388                 for(int j=0;j<nSeqsPerOTU[i];j++){
1389                     int index = cumNumSeqs[i] + j;
1390                     int nI = seqIndex[index];
1391                     
1392                     double tauValue = singleTau[seqNumber[index]];
1393                     
1394                     for(int k=0;k<position;k++){
1395                         double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
1396                         adF[k] += dist * tauValue;
1397                     }
1398                 }
1399                 
1400                 for(int j=0;j<position;j++){
1401                     if(adF[j] < minFlowValue){
1402                         minFlowGram = j;
1403                         minFlowValue = adF[j];
1404                     }
1405                 }
1406                 
1407                 if(centroids[i] != anL[minFlowGram]){
1408                     change[i] = 1;
1409                     centroids[i] = anL[minFlowGram];
1410                 }
1411             }
1412             else if(centroids[i] != -1){
1413                 change[i] = 1;
1414                 centroids[i] = -1;                      
1415             }
1416         }
1417     }
1418     catch(exception& e) {
1419         m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1420         exit(1);        
1421     }           
1422 }
1423
1424 /**************************************************************************************************/
1425
1426 double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
1427     try{
1428         
1429         int flowAValue = cent * numFlowCells;
1430         int flowBValue = flow * numFlowCells;
1431         
1432         double dist = 0;
1433         
1434         for(int i=0;i<length;i++){
1435             dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1436             flowAValue++;
1437             flowBValue++;
1438         }
1439         
1440         return dist / (double)length;
1441     }
1442     catch(exception& e) {
1443         m->errorOut(e, "ShhherCommand", "getDistToCentroid");
1444         exit(1);        
1445     }           
1446 }
1447
1448 /**************************************************************************************************/
1449
1450 double ShhherCommand::getNewWeights(){
1451     try{
1452         
1453         double maxChange = 0;
1454         
1455         for(int i=0;i<numOTUs;i++){
1456             
1457             if (m->control_pressed) { break; }
1458             
1459             double difference = weight[i];
1460             weight[i] = 0;
1461             
1462             for(int j=0;j<nSeqsPerOTU[i];j++){
1463                 int index = cumNumSeqs[i] + j;
1464                 double tauValue = singleTau[seqNumber[index]];
1465                 weight[i] += tauValue;
1466             }
1467             
1468             difference = fabs(weight[i] - difference);
1469             if(difference > maxChange){ maxChange = difference; }
1470         }
1471         return maxChange;
1472     }
1473     catch(exception& e) {
1474         m->errorOut(e, "ShhherCommand", "getNewWeights");
1475         exit(1);        
1476     }           
1477 }
1478  
1479  /**************************************************************************************************/
1480  
1481 double ShhherCommand::getLikelihood(){
1482     
1483     try{
1484         
1485         vector<long double> P(numSeqs, 0);
1486         int effNumOTUs = 0;
1487         
1488         for(int i=0;i<numOTUs;i++){
1489             if(weight[i] > MIN_WEIGHT){
1490                 effNumOTUs++;
1491             }
1492         }
1493         
1494         string hold;
1495         for(int i=0;i<numOTUs;i++){
1496             
1497             if (m->control_pressed) { break; }
1498             
1499             for(int j=0;j<nSeqsPerOTU[i];j++){
1500                 int index = cumNumSeqs[i] + j;
1501                 int nI = seqIndex[index];
1502                 double singleDist = dist[seqNumber[index]];
1503                 
1504                 P[nI] += weight[i] * exp(-singleDist * sigma);
1505             }
1506         }
1507         double nLL = 0.00;
1508         for(int i=0;i<numSeqs;i++){
1509             if(P[i] == 0){      P[i] = DBL_EPSILON;     }
1510             
1511             nLL += -log(P[i]);
1512         }
1513         
1514         nLL = nLL -(double)numSeqs * log(sigma);
1515         
1516         return nLL; 
1517     }
1518     catch(exception& e) {
1519         m->errorOut(e, "ShhherCommand", "getNewWeights");
1520         exit(1);        
1521     }           
1522 }
1523
1524 /**************************************************************************************************/
1525
1526 void ShhherCommand::checkCentroids(){
1527     try{
1528         vector<int> unique(numOTUs, 1);
1529         
1530         for(int i=0;i<numOTUs;i++){
1531             if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
1532                 unique[i] = -1;
1533             }
1534         }
1535         
1536         for(int i=0;i<numOTUs;i++){
1537             
1538             if (m->control_pressed) { break; }
1539             
1540             if(unique[i] == 1){
1541                 for(int j=i+1;j<numOTUs;j++){
1542                     if(unique[j] == 1){
1543                         
1544                         if(centroids[j] == centroids[i]){
1545                             unique[j] = 0;
1546                             centroids[j] = -1;
1547                             
1548                             weight[i] += weight[j];
1549                             weight[j] = 0.0;
1550                         }
1551                     }
1552                 }
1553             }
1554         }
1555     }
1556     catch(exception& e) {
1557         m->errorOut(e, "ShhherCommand", "checkCentroids");
1558         exit(1);        
1559     }           
1560 }
1561  /**************************************************************************************************/
1562
1563
1564  
1565 void ShhherCommand::writeQualities(vector<int> otuCounts){
1566     
1567     try {
1568         string thisOutputDir = outputDir;
1569         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1570         string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
1571         
1572         ofstream qualityFile;
1573         m->openOutputFile(qualityFileName, qualityFile);
1574         
1575         qualityFile.setf(ios::fixed, ios::floatfield);
1576         qualityFile.setf(ios::showpoint);
1577         qualityFile << setprecision(6);
1578         
1579         vector<vector<int> > qualities(numOTUs);
1580         vector<double> pr(HOMOPS, 0);
1581         
1582         
1583         for(int i=0;i<numOTUs;i++){
1584             
1585             if (m->control_pressed) { break; }
1586             
1587             int index = 0;
1588             int base = 0;
1589             
1590             if(nSeqsPerOTU[i] > 0){
1591                 qualities[i].assign(1024, -1);
1592                 
1593                 while(index < numFlowCells){
1594                     double maxPrValue = 1e8;
1595                     short maxPrIndex = -1;
1596                     double count = 0.0000;
1597                     
1598                     pr.assign(HOMOPS, 0);
1599                     
1600                     for(int j=0;j<nSeqsPerOTU[i];j++){
1601                         int lIndex = cumNumSeqs[i] + j;
1602                         double tauValue = singleTau[seqNumber[lIndex]];
1603                         int sequenceIndex = aaI[i][j];
1604                         short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1605                         
1606                         count += tauValue;
1607                         
1608                         for(int s=0;s<HOMOPS;s++){
1609                             pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
1610                         }
1611                     }
1612                     
1613                     maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1614                     maxPrValue = pr[maxPrIndex];
1615                     
1616                     if(count > MIN_COUNT){
1617                         double U = 0.0000;
1618                         double norm = 0.0000;
1619                         
1620                         for(int s=0;s<HOMOPS;s++){
1621                             norm += exp(-(pr[s] - maxPrValue));
1622                         }
1623                         
1624                         for(int s=1;s<=maxPrIndex;s++){
1625                             int value = 0;
1626                             double temp = 0.0000;
1627                             
1628                             U += exp(-(pr[s-1]-maxPrValue))/norm;
1629                             
1630                             if(U>0.00){
1631                                 temp = log10(U);
1632                             }
1633                             else{
1634                                 temp = -10.1;
1635                             }
1636                             temp = floor(-10 * temp);
1637                             value = (int)floor(temp);
1638                             if(value > 100){    value = 100;    }
1639                             
1640                             qualities[i][base] = (int)value;
1641                             base++;
1642                         }
1643                     }
1644                     
1645                     index++;
1646                 }
1647             }
1648             
1649             
1650             if(otuCounts[i] > 0){
1651                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1652                 
1653                 int j=4;        //need to get past the first four bases
1654                 while(qualities[i][j] != -1){
1655                     qualityFile << qualities[i][j] << ' ';
1656                     j++;
1657                 }
1658                 qualityFile << endl;
1659             }
1660         }
1661         qualityFile.close();
1662         outputNames.push_back(qualityFileName);
1663         
1664     }
1665     catch(exception& e) {
1666         m->errorOut(e, "ShhherCommand", "writeQualities");
1667         exit(1);        
1668     }           
1669 }
1670
1671 /**************************************************************************************************/
1672
1673 void ShhherCommand::writeSequences(vector<int> otuCounts){
1674     try {
1675         string thisOutputDir = outputDir;
1676         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1677         string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
1678         ofstream fastaFile;
1679         m->openOutputFile(fastaFileName, fastaFile);
1680         
1681         vector<string> names(numOTUs, "");
1682         
1683         for(int i=0;i<numOTUs;i++){
1684             
1685             if (m->control_pressed) { break; }
1686             
1687             int index = centroids[i];
1688             
1689             if(otuCounts[i] > 0){
1690                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1691                 
1692                 string newSeq = "";
1693                 
1694                 for(int j=0;j<numFlowCells;j++){
1695                     
1696                     char base = flowOrder[j % 4];
1697                     for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1698                         newSeq += base;
1699                     }
1700                 }
1701                 
1702                 fastaFile << newSeq.substr(4) << endl;
1703             }
1704         }
1705         fastaFile.close();
1706         
1707         outputNames.push_back(fastaFileName);
1708         
1709         if(compositeFASTAFileName != ""){
1710             m->appendFiles(fastaFileName, compositeFASTAFileName);
1711         }
1712     }
1713     catch(exception& e) {
1714         m->errorOut(e, "ShhherCommand", "writeSequences");
1715         exit(1);        
1716     }           
1717 }
1718
1719 /**************************************************************************************************/
1720
1721 void ShhherCommand::writeNames(vector<int> otuCounts){
1722     try {
1723         string thisOutputDir = outputDir;
1724         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1725         string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
1726         ofstream nameFile;
1727         m->openOutputFile(nameFileName, nameFile);
1728         
1729         for(int i=0;i<numOTUs;i++){
1730             
1731             if (m->control_pressed) { break; }
1732             
1733             if(otuCounts[i] > 0){
1734                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1735                 
1736                 for(int j=1;j<nSeqsPerOTU[i];j++){
1737                     nameFile << ',' << seqNameVector[aaI[i][j]];
1738                 }
1739                 
1740                 nameFile << endl;
1741             }
1742         }
1743         nameFile.close();
1744         outputNames.push_back(nameFileName);
1745         
1746         
1747         if(compositeNamesFileName != ""){
1748             m->appendFiles(nameFileName, compositeNamesFileName);
1749         }               
1750     }
1751     catch(exception& e) {
1752         m->errorOut(e, "ShhherCommand", "writeNames");
1753         exit(1);        
1754     }           
1755 }
1756
1757 /**************************************************************************************************/
1758
1759 void ShhherCommand::writeGroups(){
1760     try {
1761         string thisOutputDir = outputDir;
1762         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1763         string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
1764         string groupFileName = fileRoot + "shhh.groups";
1765         ofstream groupFile;
1766         m->openOutputFile(groupFileName, groupFile);
1767         
1768         for(int i=0;i<numSeqs;i++){
1769             if (m->control_pressed) { break; }
1770             groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
1771         }
1772         groupFile.close();
1773         outputNames.push_back(groupFileName);
1774         
1775     }
1776     catch(exception& e) {
1777         m->errorOut(e, "ShhherCommand", "writeGroups");
1778         exit(1);        
1779     }           
1780 }
1781
1782 /**************************************************************************************************/
1783
1784 void ShhherCommand::writeClusters(vector<int> otuCounts){
1785     try {
1786         string thisOutputDir = outputDir;
1787         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1788         string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
1789         ofstream otuCountsFile;
1790         m->openOutputFile(otuCountsFileName, otuCountsFile);
1791         
1792         string bases = flowOrder;
1793         
1794         for(int i=0;i<numOTUs;i++){
1795             
1796             if (m->control_pressed) {
1797                 break;
1798             }
1799             //output the translated version of the centroid sequence for the otu
1800             if(otuCounts[i] > 0){
1801                 int index = centroids[i];
1802                 
1803                 otuCountsFile << "ideal\t";
1804                 for(int j=8;j<numFlowCells;j++){
1805                     char base = bases[j % 4];
1806                     for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1807                         otuCountsFile << base;
1808                     }
1809                 }
1810                 otuCountsFile << endl;
1811                 
1812                 for(int j=0;j<nSeqsPerOTU[i];j++){
1813                     int sequence = aaI[i][j];
1814                     otuCountsFile << seqNameVector[sequence] << '\t';
1815                     
1816                     string newSeq = "";
1817                     
1818                     for(int k=0;k<lengths[sequence];k++){
1819                         char base = bases[k % 4];
1820                         int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1821                         
1822                         for(int s=0;s<freq;s++){
1823                             newSeq += base;
1824                             //otuCountsFile << base;
1825                         }
1826                     }
1827                     otuCountsFile << newSeq.substr(4) << endl;
1828                 }
1829                 otuCountsFile << endl;
1830             }
1831         }
1832         otuCountsFile.close();
1833         outputNames.push_back(otuCountsFileName);
1834         
1835     }
1836     catch(exception& e) {
1837         m->errorOut(e, "ShhherCommand", "writeClusters");
1838         exit(1);        
1839     }           
1840 }
1841
1842 #else
1843 //**********************************************************************************************************************
1844
1845 int ShhherCommand::execute(){
1846         try {
1847                 if (abort == true) { return 0; }
1848                 
1849                 getSingleLookUp();      if (m->control_pressed) { return 0; }
1850                 getJointLookUp();       if (m->control_pressed) { return 0; }
1851                 
1852         int numFiles = flowFileVector.size();
1853                 
1854         if (numFiles < processors) { processors = numFiles; }
1855         
1856 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1857         if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
1858         else { createProcesses(flowFileVector); } //each processor processes one file
1859 #else
1860         driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
1861 #endif
1862         
1863                 if(compositeFASTAFileName != ""){
1864                         outputNames.push_back(compositeFASTAFileName);
1865                         outputNames.push_back(compositeNamesFileName);
1866                 }
1867
1868                 m->mothurOutEndLine();
1869                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
1870                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
1871                 m->mothurOutEndLine();
1872                 
1873                 return 0;
1874         }
1875         catch(exception& e) {
1876                 m->errorOut(e, "ShhherCommand", "execute");
1877                 exit(1);
1878         }
1879 }
1880 #endif
1881 /**************************************************************************************************/
1882
1883 int ShhherCommand::createProcesses(vector<string> filenames){
1884     try {
1885         vector<int> processIDS;
1886                 int process = 1;
1887                 int num = 0;
1888                 
1889                 //sanity check
1890                 if (filenames.size() < processors) { processors = filenames.size(); }
1891                 
1892                 //divide the groups between the processors
1893                 vector<linePair> lines;
1894                 int numFilesPerProcessor = filenames.size() / processors;
1895                 for (int i = 0; i < processors; i++) {
1896                         int startIndex =  i * numFilesPerProcessor;
1897                         int endIndex = (i+1) * numFilesPerProcessor;
1898                         if(i == (processors - 1)){      endIndex = filenames.size();    }
1899                         lines.push_back(linePair(startIndex, endIndex));
1900                 }
1901                 
1902         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1903                 
1904                 //loop through and create all the processes you want
1905                 while (process != processors) {
1906                         int pid = fork();
1907                         
1908                         if (pid > 0) {
1909                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1910                                 process++;
1911                         }else if (pid == 0){
1912                                 num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
1913                                 exit(0);
1914                         }else { 
1915                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1916                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1917                                 exit(0);
1918                         }
1919                 }
1920                 
1921                 //do my part
1922                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
1923                 
1924                 //force parent to wait until all the processes are done
1925                 for (int i=0;i<processIDS.size();i++) { 
1926                         int temp = processIDS[i];
1927                         wait(&temp);
1928                 }
1929         
1930         #else
1931         
1932         //////////////////////////////////////////////////////////////////////////////////////////////////////
1933         
1934         /////////////////////// NOT WORKING, ACCESS VIOLATION ON READ OF FLOWGRAMS IN THREAD /////////////////
1935         
1936         //////////////////////////////////////////////////////////////////////////////////////////////////////
1937                 //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. 
1938                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1939                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1940                 
1941                 vector<shhhFlowsData*> pDataArray; 
1942                 DWORD   dwThreadIdArray[processors-1];
1943                 HANDLE  hThreadArray[processors-1]; 
1944                 
1945                 //Create processor worker threads.
1946                 for( int i=0; i<processors-1; i++ ){
1947                         // Allocate memory for thread data.
1948                         string extension = "";
1949                         if (i != 0) { extension = toString(i) + ".temp"; }
1950                         
1951             shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
1952                         pDataArray.push_back(tempFlow);
1953                         processIDS.push_back(i);
1954             
1955                         hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1956                 }
1957                 
1958                 //using the main process as a worker saves time and memory
1959                 //do my part
1960                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
1961                 
1962                 //Wait until all threads have terminated.
1963                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1964                 
1965                 //Close all thread handles and free memory allocations.
1966                 for(int i=0; i < pDataArray.size(); i++){
1967                         for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
1968                         CloseHandle(hThreadArray[i]);
1969                         delete pDataArray[i];
1970                 }
1971                 
1972         #endif
1973         
1974         for (int i=0;i<processIDS.size();i++) { 
1975             if (compositeFASTAFileName != "") {
1976                 m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
1977                 m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
1978                 m->mothurRemove((compositeFASTAFileName + toString(processIDS[i]) + ".temp"));
1979                 m->mothurRemove((compositeNamesFileName + toString(processIDS[i]) + ".temp"));
1980             }
1981         }
1982         
1983         return 0;
1984         
1985     }
1986         catch(exception& e) {
1987                 m->errorOut(e, "ShhherCommand", "createProcesses");
1988                 exit(1);
1989         }
1990 }
1991 /**************************************************************************************************/
1992
1993 int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
1994     try {
1995         
1996         for(int i=start;i<end;i++){
1997                         
1998                         if (m->control_pressed) { break; }
1999                         
2000                         string flowFileName = filenames[i];
2001             
2002                         m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
2003                         m->mothurOut("Reading flowgrams...\n");
2004                         
2005             vector<string> seqNameVector;
2006             vector<int> lengths;
2007             vector<short> flowDataIntI;
2008             vector<double> flowDataPrI;
2009             map<string, int> nameMap;
2010             vector<short> uniqueFlowgrams;
2011             vector<int> uniqueCount;
2012             vector<int> mapSeqToUnique;
2013             vector<int> mapUniqueToSeq;
2014             vector<int> uniqueLengths;
2015             int numFlowCells;
2016             
2017             int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
2018                         
2019                         if (m->control_pressed) { break; }
2020                         
2021                         m->mothurOut("Identifying unique flowgrams...\n");
2022                         int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
2023                         
2024                         if (m->control_pressed) { break; }
2025                         
2026                         m->mothurOut("Calculating distances between flowgrams...\n");
2027             string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
2028             unsigned long long begTime = time(NULL);
2029             double begClock = clock();
2030         
2031             flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);     
2032             
2033             m->mothurOutEndLine();
2034             m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
2035
2036             
2037                         string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
2038                         createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
2039                         
2040                         if (m->control_pressed) { break; }
2041                         
2042                         m->mothurOut("\nClustering flowgrams...\n");
2043             string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
2044                         cluster(listFileName, distFileName, namesFileName);
2045                         
2046                         if (m->control_pressed) { break; }
2047             
2048             vector<int> otuData;
2049             vector<int> cumNumSeqs;
2050             vector<int> nSeqsPerOTU;
2051             vector<vector<int> > aaP;   //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2052             vector<vector<int> > aaI;   //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2053             vector<int> seqNumber;              //tMaster->anP:         the sequence id number sorted by OTU
2054             vector<int> seqIndex;               //tMaster->anI;         the index that corresponds to seqNumber
2055
2056                         
2057                         int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
2058                         
2059                         if (m->control_pressed) { break; }
2060                         
2061                         m->mothurRemove(distFileName);
2062                         m->mothurRemove(namesFileName);
2063                         m->mothurRemove(listFileName);
2064                         
2065             vector<double> dist;                //adDist - distance of sequences to centroids
2066             vector<short> change;               //did the centroid sequence change? 0 = no; 1 = yes
2067             vector<int> centroids;              //the representative flowgram for each cluster m
2068             vector<double> weight;
2069             vector<double> singleTau;   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2070             vector<int> nSeqsBreaks;
2071             vector<int> nOTUsBreaks;
2072             
2073                         dist.assign(numSeqs * numOTUs, 0);
2074             change.assign(numOTUs, 1);
2075             centroids.assign(numOTUs, -1);
2076             weight.assign(numOTUs, 0);
2077             singleTau.assign(numSeqs, 1.0);
2078             
2079             nSeqsBreaks.assign(2, 0);
2080             nOTUsBreaks.assign(2, 0);
2081             
2082             nSeqsBreaks[0] = 0;
2083             nSeqsBreaks[1] = numSeqs;
2084             nOTUsBreaks[1] = numOTUs;
2085                         
2086                         if (m->control_pressed) { break; }
2087                         
2088                         double maxDelta = 0;
2089                         int iter = 0;
2090                         
2091                         begClock = clock();
2092                         begTime = time(NULL);
2093             
2094                         m->mothurOut("\nDenoising flowgrams...\n");
2095                         m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
2096                         
2097                         while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
2098                                 
2099                                 if (m->control_pressed) { break; }
2100                                 
2101                                 double cycClock = clock();
2102                                 unsigned long long cycTime = time(NULL);
2103                                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2104                                 
2105                                 if (m->control_pressed) { break; }
2106                 
2107                                 calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
2108                                 
2109                                 if (m->control_pressed) { break; }
2110                 
2111                                 maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
2112                 
2113                 if (m->control_pressed) { break; }
2114                 
2115                                 double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
2116                 
2117                 if (m->control_pressed) { break; }
2118                 
2119                                 checkCentroids(numOTUs, centroids, weight);
2120                                 
2121                                 if (m->control_pressed) { break; }
2122                                 
2123                                 calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
2124                                 
2125                                 if (m->control_pressed) { break; }
2126                                 
2127                                 iter++;
2128                                 
2129                                 m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
2130                 
2131                         }       
2132                         
2133                         if (m->control_pressed) { break; }
2134                         
2135                         m->mothurOut("\nFinalizing...\n");
2136                         fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2137                         
2138                         if (m->control_pressed) { break; }
2139                         
2140                         setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
2141                         
2142                         if (m->control_pressed) { break; }
2143                         
2144                         vector<int> otuCounts(numOTUs, 0);
2145                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
2146                         
2147                         calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
2148             
2149             if (m->control_pressed) { break; }
2150             
2151                         writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
2152                         writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
2153                         writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
2154                         writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                       if (m->control_pressed) { break; }
2155                         writeGroups(flowFileName, numSeqs, seqNameVector);                                              if (m->control_pressed) { break; }
2156                         
2157                         m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
2158                 }
2159                 
2160         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
2161         
2162         return 0;
2163         
2164     }catch(exception& e) {
2165             m->errorOut(e, "ShhherCommand", "driver");
2166             exit(1);
2167     }
2168 }
2169
2170 /**************************************************************************************************/
2171 int ShhherCommand::getFlowData(string filename, vector<string>& thisSeqNameVector, vector<int>& thisLengths, vector<short>& thisFlowDataIntI, map<string, int>& thisNameMap, int& numFlowCells){
2172         try{
2173        
2174                 ifstream flowFile;
2175        
2176                 m->openInputFile(filename, flowFile);
2177                 
2178                 string seqName;
2179                 int currentNumFlowCells;
2180                 float intensity;
2181         thisSeqNameVector.clear();
2182                 thisLengths.clear();
2183                 thisFlowDataIntI.clear();
2184                 thisNameMap.clear();
2185                 
2186                 flowFile >> numFlowCells;
2187                 int index = 0;//pcluster
2188                 while(!flowFile.eof()){
2189                         
2190                         if (m->control_pressed) { break; }
2191                         
2192                         flowFile >> seqName >> currentNumFlowCells;
2193                         thisLengths.push_back(currentNumFlowCells);
2194            
2195                         thisSeqNameVector.push_back(seqName);
2196                         thisNameMap[seqName] = index++;//pcluster
2197
2198                         for(int i=0;i<numFlowCells;i++){
2199                                 flowFile >> intensity;
2200                                 if(intensity > 9.99)    {       intensity = 9.99;       }
2201                                 int intI = int(100 * intensity + 0.0001);
2202                                 thisFlowDataIntI.push_back(intI);
2203                         }
2204                         m->gobble(flowFile);
2205                 }
2206                 flowFile.close();
2207                 
2208                 int numSeqs = thisSeqNameVector.size();         
2209                 
2210                 for(int i=0;i<numSeqs;i++){
2211                         
2212                         if (m->control_pressed) { break; }
2213                         
2214                         int iNumFlowCells = i * numFlowCells;
2215                         for(int j=thisLengths[i];j<numFlowCells;j++){
2216                                 thisFlowDataIntI[iNumFlowCells + j] = 0;
2217                         }
2218                 }
2219         
2220         return numSeqs;
2221                 
2222         }
2223         catch(exception& e) {
2224                 m->errorOut(e, "ShhherCommand", "getFlowData");
2225                 exit(1);
2226         }
2227 }
2228 /**************************************************************************************************/
2229
2230 int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector<int>& mapUniqueToSeq, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2231         try{            
2232         
2233                 ostringstream outStream;
2234                 outStream.setf(ios::fixed, ios::floatfield);
2235                 outStream.setf(ios::dec, ios::basefield);
2236                 outStream.setf(ios::showpoint);
2237                 outStream.precision(6);
2238                 
2239                 int begTime = time(NULL);
2240                 double begClock = clock();
2241         
2242                 for(int i=0;i<stopSeq;i++){
2243                         
2244                         if (m->control_pressed) { break; }
2245                         
2246                         for(int j=0;j<i;j++){
2247                                 float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
2248                 
2249                                 if(flowDistance < 1e-6){
2250                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
2251                                 }
2252                                 else if(flowDistance <= cutoff){
2253                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
2254                                 }
2255                         }
2256                         if(i % 100 == 0){
2257                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
2258                                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2259                                 m->mothurOutEndLine();
2260                         }
2261                 }
2262                 
2263                 ofstream distFile(distFileName.c_str());
2264                 distFile << outStream.str();            
2265                 distFile.close();
2266                 
2267                 if (m->control_pressed) {}
2268                 else {
2269                         m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
2270                         m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2271                         m->mothurOutEndLine();
2272                 }
2273         
2274         return 0;
2275         }
2276         catch(exception& e) {
2277                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
2278                 exit(1);
2279         }
2280 }
2281 /**************************************************************************************************/
2282
2283 float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2284         try{
2285                 int minLength = lengths[mapSeqToUnique[seqA]];
2286                 if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
2287                 
2288                 int ANumFlowCells = seqA * numFlowCells;
2289                 int BNumFlowCells = seqB * numFlowCells;
2290                 
2291                 float dist = 0;
2292                 
2293                 for(int i=0;i<minLength;i++){
2294                         
2295                         if (m->control_pressed) { break; }
2296                         
2297                         int flowAIntI = flowDataIntI[ANumFlowCells + i];
2298                         float flowAPrI = flowDataPrI[ANumFlowCells + i];
2299                         
2300                         int flowBIntI = flowDataIntI[BNumFlowCells + i];
2301                         float flowBPrI = flowDataPrI[BNumFlowCells + i];
2302                         dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
2303                 }
2304                 
2305                 dist /= (float) minLength;
2306                 return dist;
2307         }
2308         catch(exception& e) {
2309                 m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
2310                 exit(1);
2311         }
2312 }
2313
2314 /**************************************************************************************************/
2315
2316 int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector<short>& uniqueFlowgrams, vector<int>& uniqueCount, vector<int>& uniqueLengths, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2317         try{
2318                 int numUniques = 0;
2319                 uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
2320                 uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
2321                 uniqueLengths.assign(numSeqs, 0);
2322                 mapSeqToUnique.assign(numSeqs, -1);
2323                 mapUniqueToSeq.assign(numSeqs, -1);
2324                 
2325                 vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
2326                 
2327                 for(int i=0;i<numSeqs;i++){
2328                         
2329                         if (m->control_pressed) { break; }
2330                         
2331                         int index = 0;
2332                         
2333                         vector<short> current(numFlowCells);
2334                         for(int j=0;j<numFlowCells;j++){
2335                                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
2336                         }
2337             
2338                         for(int j=0;j<numUniques;j++){
2339                                 int offset = j * numFlowCells;
2340                                 bool toEnd = 1;
2341                                 
2342                                 int shorterLength;
2343                                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
2344                                 else                                                            {       shorterLength = uniqueLengths[j];       }
2345                 
2346                                 for(int k=0;k<shorterLength;k++){
2347                                         if(current[k] != uniqueFlowgrams[offset + k]){
2348                                                 toEnd = 0;
2349                                                 break;
2350                                         }
2351                                 }
2352                                 
2353                                 if(toEnd){
2354                                         mapSeqToUnique[i] = j;
2355                                         uniqueCount[j]++;
2356                                         index = j;
2357                                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
2358                                         break;
2359                                 }
2360                                 index++;
2361                         }
2362                         
2363                         if(index == numUniques){
2364                                 uniqueLengths[numUniques] = lengths[i];
2365                                 uniqueCount[numUniques] = 1;
2366                                 mapSeqToUnique[i] = numUniques;//anMap
2367                                 mapUniqueToSeq[numUniques] = i;//anF
2368                                 
2369                                 for(int k=0;k<numFlowCells;k++){
2370                                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
2371                                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
2372                                 }
2373                                 
2374                                 numUniques++;
2375                         }
2376                 }
2377                 uniqueFlowDataIntI.resize(numFlowCells * numUniques);
2378                 uniqueLengths.resize(numUniques);       
2379                 
2380                 flowDataPrI.resize(numSeqs * numFlowCells, 0);
2381                 for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
2382         
2383         return numUniques;
2384         }
2385         catch(exception& e) {
2386                 m->errorOut(e, "ShhherCommand", "getUniques");
2387                 exit(1);
2388         }
2389 }
2390 /**************************************************************************************************/
2391 int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector<string>& seqNameVector, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq){
2392         try{
2393                 
2394                 vector<string> duplicateNames(numUniques, "");
2395                 for(int i=0;i<numSeqs;i++){
2396                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
2397                 }
2398                 
2399                 ofstream nameFile;
2400                 m->openOutputFile(filename, nameFile);
2401                 
2402                 for(int i=0;i<numUniques;i++){
2403                         
2404                         if (m->control_pressed) { break; }
2405                         
2406             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2407                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2408                 }
2409                 
2410                 nameFile.close();
2411         
2412                 return 0;
2413         }
2414         catch(exception& e) {
2415                 m->errorOut(e, "ShhherCommand", "createNamesFile");
2416                 exit(1);
2417         }
2418 }
2419 //**********************************************************************************************************************
2420
2421 int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){
2422         try {
2423                 
2424                 ReadMatrix* read = new ReadColumnMatrix(distFileName);  
2425                 read->setCutoff(cutoff);
2426                 
2427                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
2428                 clusterNameMap->readMap();
2429                 read->read(clusterNameMap);
2430                 
2431                 ListVector* list = read->getListVector();
2432                 SparseMatrix* matrix = read->getMatrix();
2433                 
2434                 delete read; 
2435                 delete clusterNameMap; 
2436         
2437                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
2438                 
2439                 Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
2440                 string tag = cluster->getTag();
2441                 
2442                 double clusterCutoff = cutoff;
2443                 while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
2444                         
2445                         if (m->control_pressed) { break; }
2446                         
2447                         cluster->update(clusterCutoff);
2448                 }
2449                 
2450                 list->setLabel(toString(cutoff));
2451                 
2452                 ofstream listFile;
2453                 m->openOutputFile(filename, listFile);
2454                 list->print(listFile);
2455                 listFile.close();
2456                 
2457                 delete matrix;  delete cluster; delete rabund; delete list;
2458         
2459                 return 0;
2460         }
2461         catch(exception& e) {
2462                 m->errorOut(e, "ShhherCommand", "cluster");
2463                 exit(1);        
2464         }               
2465 }
2466 /**************************************************************************************************/
2467
2468 int ShhherCommand::getOTUData(int numSeqs, string fileName,  vector<int>& otuData,
2469                                vector<int>& cumNumSeqs,
2470                                vector<int>& nSeqsPerOTU,
2471                                vector<vector<int> >& aaP,       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2472                                vector<vector<int> >& aaI,       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2473                                vector<int>& seqNumber,          //tMaster->anP:         the sequence id number sorted by OTU
2474                                vector<int>& seqIndex,
2475                                map<string, int>& nameMap){
2476         try {
2477         
2478                 ifstream listFile;
2479                 m->openInputFile(fileName, listFile);
2480                 string label;
2481         int numOTUs;
2482                 
2483                 listFile >> label >> numOTUs;
2484         
2485                 otuData.assign(numSeqs, 0);
2486                 cumNumSeqs.assign(numOTUs, 0);
2487                 nSeqsPerOTU.assign(numOTUs, 0);
2488                 aaP.clear();aaP.resize(numOTUs);
2489                 
2490                 seqNumber.clear();
2491                 aaI.clear();
2492                 seqIndex.clear();
2493                 
2494                 string singleOTU = "";
2495                 
2496                 for(int i=0;i<numOTUs;i++){
2497                         
2498                         if (m->control_pressed) { break; }
2499             
2500                         listFile >> singleOTU;
2501                         
2502                         istringstream otuString(singleOTU);
2503             
2504                         while(otuString){
2505                                 
2506                                 string seqName = "";
2507                                 
2508                                 for(int j=0;j<singleOTU.length();j++){
2509                                         char letter = otuString.get();
2510                                         
2511                                         if(letter != ','){
2512                                                 seqName += letter;
2513                                         }
2514                                         else{
2515                                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2516                                                 int index = nmIt->second;
2517                                                 
2518                                                 nameMap.erase(nmIt);
2519                                                 
2520                                                 otuData[index] = i;
2521                                                 nSeqsPerOTU[i]++;
2522                                                 aaP[i].push_back(index);
2523                                                 seqName = "";
2524                                         }
2525                                 }
2526                                 
2527                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2528                 
2529                                 int index = nmIt->second;
2530                                 nameMap.erase(nmIt);
2531                 
2532                                 otuData[index] = i;
2533                                 nSeqsPerOTU[i]++;
2534                                 aaP[i].push_back(index);        
2535                                 
2536                                 otuString.get();
2537                         }
2538                         
2539                         sort(aaP[i].begin(), aaP[i].end());
2540                         for(int j=0;j<nSeqsPerOTU[i];j++){
2541                                 seqNumber.push_back(aaP[i][j]);
2542                         }
2543                         for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
2544                                 aaP[i].push_back(0);
2545                         }
2546                         
2547                         
2548                 }
2549                 
2550                 for(int i=1;i<numOTUs;i++){
2551                         cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
2552                 }
2553                 aaI = aaP;
2554                 seqIndex = seqNumber;
2555                 
2556                 listFile.close();       
2557         
2558         return numOTUs;
2559                 
2560         }
2561         catch(exception& e) {
2562                 m->errorOut(e, "ShhherCommand", "getOTUData");
2563                 exit(1);        
2564         }               
2565 }
2566 /**************************************************************************************************/
2567
2568 int ShhherCommand::calcCentroidsDriver(int numOTUs, 
2569                                           vector<int>& cumNumSeqs,
2570                                           vector<int>& nSeqsPerOTU,
2571                                           vector<int>& seqIndex,
2572                                           vector<short>& change,                //did the centroid sequence change? 0 = no; 1 = yes
2573                                           vector<int>& centroids,               //the representative flowgram for each cluster m
2574                                           vector<double>& singleTau,    //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2575                                           vector<int>& mapSeqToUnique,
2576                                           vector<short>& uniqueFlowgrams,
2577                                           vector<short>& flowDataIntI,
2578                                           vector<int>& lengths,
2579                                           int numFlowCells,
2580                                           vector<int>& seqNumber){                          
2581         
2582         //this function gets the most likely homopolymer length at a flow position for a group of sequences
2583         //within an otu
2584         
2585         try{
2586                 
2587                 for(int i=0;i<numOTUs;i++){
2588                         
2589                         if (m->control_pressed) { break; }
2590                         
2591                         double count = 0;
2592                         int position = 0;
2593                         int minFlowGram = 100000000;
2594                         double minFlowValue = 1e8;
2595                         change[i] = 0; //FALSE
2596                         
2597                         for(int j=0;j<nSeqsPerOTU[i];j++){
2598                                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
2599                         }
2600             
2601                         if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
2602                                 vector<double> adF(nSeqsPerOTU[i]);
2603                                 vector<int> anL(nSeqsPerOTU[i]);
2604                                 
2605                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2606                                         int index = cumNumSeqs[i] + j;
2607                                         int nI = seqIndex[index];
2608                                         int nIU = mapSeqToUnique[nI];
2609                                         
2610                                         int k;
2611                                         for(k=0;k<position;k++){
2612                                                 if(nIU == anL[k]){
2613                                                         break;
2614                                                 }
2615                                         }
2616                                         if(k == position){
2617                                                 anL[position] = nIU;
2618                                                 adF[position] = 0.0000;
2619                                                 position++;
2620                                         }                                               
2621                                 }
2622                                 
2623                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2624                                         int index = cumNumSeqs[i] + j;
2625                                         int nI = seqIndex[index];
2626                                         
2627                                         double tauValue = singleTau[seqNumber[index]];
2628                                         
2629                                         for(int k=0;k<position;k++){
2630                                                 double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
2631                                                 adF[k] += dist * tauValue;
2632                                         }
2633                                 }
2634                                 
2635                                 for(int j=0;j<position;j++){
2636                                         if(adF[j] < minFlowValue){
2637                                                 minFlowGram = j;
2638                                                 minFlowValue = adF[j];
2639                                         }
2640                                 }
2641                                 
2642                                 if(centroids[i] != anL[minFlowGram]){
2643                                         change[i] = 1;
2644                                         centroids[i] = anL[minFlowGram];
2645                                 }
2646                         }
2647                         else if(centroids[i] != -1){
2648                                 change[i] = 1;
2649                                 centroids[i] = -1;                      
2650                         }
2651                 }
2652         
2653         return 0;
2654         }
2655         catch(exception& e) {
2656                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
2657                 exit(1);        
2658         }               
2659 }
2660 /**************************************************************************************************/
2661
2662 double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector<short>& uniqueFlowgrams,
2663                                         vector<short>& flowDataIntI, int numFlowCells){
2664         try{
2665                 
2666                 int flowAValue = cent * numFlowCells;
2667                 int flowBValue = flow * numFlowCells;
2668                 
2669                 double dist = 0;
2670         
2671                 for(int i=0;i<length;i++){
2672                         dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
2673                         flowAValue++;
2674                         flowBValue++;
2675                 }
2676                 
2677                 return dist / (double)length;
2678         }
2679         catch(exception& e) {
2680                 m->errorOut(e, "ShhherCommand", "getDistToCentroid");
2681                 exit(1);        
2682         }               
2683 }
2684 /**************************************************************************************************/
2685
2686 double ShhherCommand::getNewWeights(int numOTUs, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<double>& singleTau, vector<int>& seqNumber, vector<double>& weight){
2687         try{
2688                 
2689                 double maxChange = 0;
2690                 
2691                 for(int i=0;i<numOTUs;i++){
2692                         
2693                         if (m->control_pressed) { break; }
2694                         
2695                         double difference = weight[i];
2696                         weight[i] = 0;
2697                         
2698                         for(int j=0;j<nSeqsPerOTU[i];j++){
2699                                 int index = cumNumSeqs[i] + j;
2700                                 double tauValue = singleTau[seqNumber[index]];
2701                                 weight[i] += tauValue;
2702                         }
2703                         
2704                         difference = fabs(weight[i] - difference);
2705                         if(difference > maxChange){     maxChange = difference; }
2706                 }
2707                 return maxChange;
2708         }
2709         catch(exception& e) {
2710                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2711                 exit(1);        
2712         }               
2713 }
2714
2715 /**************************************************************************************************/
2716
2717 double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<int>& seqNumber, vector<int>& cumNumSeqs, vector<int>& seqIndex, vector<double>& dist, vector<double>& weight){
2718         
2719         try{
2720                 
2721                 vector<long double> P(numSeqs, 0);
2722                 int effNumOTUs = 0;
2723                 
2724                 for(int i=0;i<numOTUs;i++){
2725                         if(weight[i] > MIN_WEIGHT){
2726                                 effNumOTUs++;
2727                         }
2728                 }
2729                 
2730                 string hold;
2731                 for(int i=0;i<numOTUs;i++){
2732                         
2733                         if (m->control_pressed) { break; }
2734                         
2735                         for(int j=0;j<nSeqsPerOTU[i];j++){
2736                                 int index = cumNumSeqs[i] + j;
2737                                 int nI = seqIndex[index];
2738                                 double singleDist = dist[seqNumber[index]];
2739                                 
2740                                 P[nI] += weight[i] * exp(-singleDist * sigma);
2741                         }
2742                 }
2743                 double nLL = 0.00;
2744                 for(int i=0;i<numSeqs;i++){
2745                         if(P[i] == 0){  P[i] = DBL_EPSILON;     }
2746             
2747                         nLL += -log(P[i]);
2748                 }
2749                 
2750                 nLL = nLL -(double)numSeqs * log(sigma);
2751         
2752                 return nLL; 
2753         }
2754         catch(exception& e) {
2755                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2756                 exit(1);        
2757         }               
2758 }
2759
2760 /**************************************************************************************************/
2761
2762 int ShhherCommand::checkCentroids(int numOTUs, vector<int>& centroids, vector<double>& weight){
2763         try{
2764                 vector<int> unique(numOTUs, 1);
2765                 
2766                 for(int i=0;i<numOTUs;i++){
2767                         if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
2768                                 unique[i] = -1;
2769                         }
2770                 }
2771                 
2772                 for(int i=0;i<numOTUs;i++){
2773                         
2774                         if (m->control_pressed) { break; }
2775                         
2776                         if(unique[i] == 1){
2777                                 for(int j=i+1;j<numOTUs;j++){
2778                                         if(unique[j] == 1){
2779                                                 
2780                                                 if(centroids[j] == centroids[i]){
2781                                                         unique[j] = 0;
2782                                                         centroids[j] = -1;
2783                                                         
2784                                                         weight[i] += weight[j];
2785                                                         weight[j] = 0.0;
2786                                                 }
2787                                         }
2788                                 }
2789                         }
2790                 }
2791         
2792         return 0;
2793         }
2794         catch(exception& e) {
2795                 m->errorOut(e, "ShhherCommand", "checkCentroids");
2796                 exit(1);        
2797         }               
2798 }
2799 /**************************************************************************************************/
2800
2801 void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<double>& dist, 
2802                                      vector<double>& weight, vector<short>& change, vector<int>& centroids,
2803                                      vector<vector<int> >& aaP, vector<double>& singleTau, vector<vector<int> >& aaI,   
2804                                      vector<int>& seqNumber, vector<int>& seqIndex,
2805                                      vector<short>& uniqueFlowgrams,
2806                                      vector<short>& flowDataIntI, int numFlowCells, vector<int>& lengths){
2807         
2808         try{
2809                 
2810                 int total = 0;
2811                 vector<double> newTau(numOTUs,0);
2812                 vector<double> norms(numSeqs, 0);
2813                 nSeqsPerOTU.assign(numOTUs, 0);
2814         
2815                 for(int i=0;i<numSeqs;i++){
2816                         
2817                         if (m->control_pressed) { break; }
2818                         
2819                         int indexOffset = i * numOTUs;
2820             
2821                         double offset = 1e8;
2822                         
2823                         for(int j=0;j<numOTUs;j++){
2824                 
2825                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
2826                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
2827                                 }
2828                 
2829                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
2830                                         offset = dist[indexOffset + j];
2831                                 }
2832                         }
2833             
2834                         for(int j=0;j<numOTUs;j++){
2835                                 if(weight[j] > MIN_WEIGHT){
2836                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
2837                                         norms[i] += newTau[j];
2838                                 }
2839                                 else{
2840                                         newTau[j] = 0.0;
2841                                 }
2842                         }
2843             
2844                         for(int j=0;j<numOTUs;j++){
2845                                 newTau[j] /= norms[i];
2846                         }
2847             
2848                         for(int j=0;j<numOTUs;j++){
2849                                 if(newTau[j] > MIN_TAU){
2850                                         
2851                                         int oldTotal = total;
2852                                         
2853                                         total++;
2854                                         
2855                                         singleTau.resize(total, 0);
2856                                         seqNumber.resize(total, 0);
2857                                         seqIndex.resize(total, 0);
2858                                         
2859                                         singleTau[oldTotal] = newTau[j];
2860                                         
2861                                         aaP[j][nSeqsPerOTU[j]] = oldTotal;
2862                                         aaI[j][nSeqsPerOTU[j]] = i;
2863                                         nSeqsPerOTU[j]++;
2864                                 }
2865                         }
2866             
2867                 }
2868         
2869         }
2870         catch(exception& e) {
2871                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
2872                 exit(1);        
2873         }               
2874 }
2875 /**************************************************************************************************/
2876
2877 int ShhherCommand::fill(int numOTUs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
2878         try {
2879                 int index = 0;
2880                 for(int i=0;i<numOTUs;i++){
2881                         
2882                         if (m->control_pressed) { return 0; }
2883                         
2884                         cumNumSeqs[i] = index;
2885                         for(int j=0;j<nSeqsPerOTU[i];j++){
2886                                 seqNumber[index] = aaP[i][j];
2887                                 seqIndex[index] = aaI[i][j];
2888                                 
2889                                 index++;
2890                         }
2891                 }
2892         
2893         return 0; 
2894         }
2895         catch(exception& e) {
2896                 m->errorOut(e, "ShhherCommand", "fill");
2897                 exit(1);        
2898         }               
2899 }
2900 /**************************************************************************************************/
2901
2902 void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU,
2903                             vector<int>& otuData, vector<double>& singleTau, vector<double>& dist, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
2904         
2905         try {
2906                 vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
2907                 
2908                 for(int i=0;i<numOTUs;i++){
2909                         
2910                         if (m->control_pressed) { break; }
2911                         
2912                         for(int j=0;j<nSeqsPerOTU[i];j++){
2913                                 int index = cumNumSeqs[i] + j;
2914                                 double tauValue = singleTau[seqNumber[index]];
2915                                 int sIndex = seqIndex[index];
2916                                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
2917                         }
2918                 }
2919                 
2920                 for(int i=0;i<numSeqs;i++){
2921                         double maxTau = -1.0000;
2922                         int maxOTU = -1;
2923                         for(int j=0;j<numOTUs;j++){
2924                                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
2925                                         maxTau = bigTauMatrix[i * numOTUs + j];
2926                                         maxOTU = j;
2927                                 }
2928                         }
2929                         
2930                         otuData[i] = maxOTU;
2931                 }
2932                 
2933                 nSeqsPerOTU.assign(numOTUs, 0);         
2934                 
2935                 for(int i=0;i<numSeqs;i++){
2936                         int index = otuData[i];
2937                         
2938                         singleTau[i] = 1.0000;
2939                         dist[i] = 0.0000;
2940                         
2941                         aaP[index][nSeqsPerOTU[index]] = i;
2942                         aaI[index][nSeqsPerOTU[index]] = i;
2943                         
2944                         nSeqsPerOTU[index]++;
2945                 }
2946         
2947                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);  
2948         }
2949         catch(exception& e) {
2950                 m->errorOut(e, "ShhherCommand", "setOTUs");
2951                 exit(1);        
2952         }               
2953 }
2954 /**************************************************************************************************/
2955
2956 void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
2957                                    vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
2958                                    vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
2959         
2960         try {
2961                 string thisOutputDir = outputDir;
2962                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
2963                 string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
2964         
2965                 ofstream qualityFile;
2966                 m->openOutputFile(qualityFileName, qualityFile);
2967         
2968                 qualityFile.setf(ios::fixed, ios::floatfield);
2969                 qualityFile.setf(ios::showpoint);
2970                 qualityFile << setprecision(6);
2971                 
2972                 vector<vector<int> > qualities(numOTUs);
2973                 vector<double> pr(HOMOPS, 0);
2974                 
2975                 
2976                 for(int i=0;i<numOTUs;i++){
2977                         
2978                         if (m->control_pressed) { break; }
2979                         
2980                         int index = 0;
2981                         int base = 0;
2982                         
2983                         if(nSeqsPerOTU[i] > 0){
2984                                 qualities[i].assign(1024, -1);
2985                                 
2986                                 while(index < numFlowCells){
2987                                         double maxPrValue = 1e8;
2988                                         short maxPrIndex = -1;
2989                                         double count = 0.0000;
2990                                         
2991                                         pr.assign(HOMOPS, 0);
2992                                         
2993                                         for(int j=0;j<nSeqsPerOTU[i];j++){
2994                                                 int lIndex = cumNumSeqs[i] + j;
2995                                                 double tauValue = singleTau[seqNumber[lIndex]];
2996                                                 int sequenceIndex = aaI[i][j];
2997                                                 short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
2998                                                 
2999                                                 count += tauValue;
3000                                                 
3001                                                 for(int s=0;s<HOMOPS;s++){
3002                                                         pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
3003                                                 }
3004                                         }
3005                                         
3006                                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
3007                                         maxPrValue = pr[maxPrIndex];
3008                                         
3009                                         if(count > MIN_COUNT){
3010                                                 double U = 0.0000;
3011                                                 double norm = 0.0000;
3012                                                 
3013                                                 for(int s=0;s<HOMOPS;s++){
3014                                                         norm += exp(-(pr[s] - maxPrValue));
3015                                                 }
3016                                                 
3017                                                 for(int s=1;s<=maxPrIndex;s++){
3018                                                         int value = 0;
3019                                                         double temp = 0.0000;
3020                                                         
3021                                                         U += exp(-(pr[s-1]-maxPrValue))/norm;
3022                                                         
3023                                                         if(U>0.00){
3024                                                                 temp = log10(U);
3025                                                         }
3026                                                         else{
3027                                                                 temp = -10.1;
3028                                                         }
3029                                                         temp = floor(-10 * temp);
3030                                                         value = (int)floor(temp);
3031                                                         if(value > 100){        value = 100;    }
3032                                                         
3033                                                         qualities[i][base] = (int)value;
3034                                                         base++;
3035                                                 }
3036                                         }
3037                                         
3038                                         index++;
3039                                 }
3040                         }
3041                         
3042                         
3043                         if(otuCounts[i] > 0){
3044                                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
3045                                 
3046                                 int j=4;        //need to get past the first four bases
3047                                 while(qualities[i][j] != -1){
3048                                         qualityFile << qualities[i][j] << ' ';
3049                                         j++;
3050                                 }
3051                                 qualityFile << endl;
3052                         }
3053                 }
3054                 qualityFile.close();
3055                 outputNames.push_back(qualityFileName);
3056         
3057         }
3058         catch(exception& e) {
3059                 m->errorOut(e, "ShhherCommand", "writeQualities");
3060                 exit(1);        
3061         }               
3062 }
3063
3064 /**************************************************************************************************/
3065
3066 void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
3067         try {
3068                 string thisOutputDir = outputDir;
3069                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3070                 string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
3071                 ofstream fastaFile;
3072                 m->openOutputFile(fastaFileName, fastaFile);
3073                 
3074                 vector<string> names(numOTUs, "");
3075                 
3076                 for(int i=0;i<numOTUs;i++){
3077                         
3078                         if (m->control_pressed) { break; }
3079                         
3080                         int index = centroids[i];
3081                         
3082                         if(otuCounts[i] > 0){
3083                                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
3084                                 
3085                                 string newSeq = "";
3086                                 
3087                                 for(int j=0;j<numFlowCells;j++){
3088                                         
3089                                         char base = flowOrder[j % 4];
3090                                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
3091                                                 newSeq += base;
3092                                         }
3093                                 }
3094                                 
3095                                 fastaFile << newSeq.substr(4) << endl;
3096                         }
3097                 }
3098                 fastaFile.close();
3099         
3100                 outputNames.push_back(fastaFileName);
3101         
3102                 if(thisCompositeFASTAFileName != ""){
3103                         m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
3104                 }
3105         }
3106         catch(exception& e) {
3107                 m->errorOut(e, "ShhherCommand", "writeSequences");
3108                 exit(1);        
3109         }               
3110 }
3111
3112 /**************************************************************************************************/
3113
3114 void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
3115         try {
3116                 string thisOutputDir = outputDir;
3117                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3118                 string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
3119                 ofstream nameFile;
3120                 m->openOutputFile(nameFileName, nameFile);
3121                 
3122                 for(int i=0;i<numOTUs;i++){
3123                         
3124                         if (m->control_pressed) { break; }
3125                         
3126                         if(otuCounts[i] > 0){
3127                                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
3128                                 
3129                                 for(int j=1;j<nSeqsPerOTU[i];j++){
3130                                         nameFile << ',' << seqNameVector[aaI[i][j]];
3131                                 }
3132                                 
3133                                 nameFile << endl;
3134                         }
3135                 }
3136                 nameFile.close();
3137                 outputNames.push_back(nameFileName);
3138                 
3139                 
3140                 if(thisCompositeNamesFileName != ""){
3141                         m->appendFiles(nameFileName, thisCompositeNamesFileName);
3142                 }               
3143         }
3144         catch(exception& e) {
3145                 m->errorOut(e, "ShhherCommand", "writeNames");
3146                 exit(1);        
3147         }               
3148 }
3149
3150 /**************************************************************************************************/
3151
3152 void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
3153         try {
3154                 string thisOutputDir = outputDir;
3155                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3156                 string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
3157                 string groupFileName = fileRoot + "shhh.groups";
3158                 ofstream groupFile;
3159                 m->openOutputFile(groupFileName, groupFile);
3160                 
3161                 for(int i=0;i<numSeqs;i++){
3162                         if (m->control_pressed) { break; }
3163                         groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
3164                 }
3165                 groupFile.close();
3166                 outputNames.push_back(groupFileName);
3167         
3168         }
3169         catch(exception& e) {
3170                 m->errorOut(e, "ShhherCommand", "writeGroups");
3171                 exit(1);        
3172         }               
3173 }
3174
3175 /**************************************************************************************************/
3176
3177 void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
3178         try {
3179                 string thisOutputDir = outputDir;
3180                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3181                 string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
3182                 ofstream otuCountsFile;
3183                 m->openOutputFile(otuCountsFileName, otuCountsFile);
3184                 
3185                 string bases = flowOrder;
3186                 
3187                 for(int i=0;i<numOTUs;i++){
3188                         
3189                         if (m->control_pressed) {
3190                                 break;
3191                         }
3192                         //output the translated version of the centroid sequence for the otu
3193                         if(otuCounts[i] > 0){
3194                                 int index = centroids[i];
3195                                 
3196                                 otuCountsFile << "ideal\t";
3197                                 for(int j=8;j<numFlowCells;j++){
3198                                         char base = bases[j % 4];
3199                                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
3200                                                 otuCountsFile << base;
3201                                         }
3202                                 }
3203                                 otuCountsFile << endl;
3204                                 
3205                                 for(int j=0;j<nSeqsPerOTU[i];j++){
3206                                         int sequence = aaI[i][j];
3207                                         otuCountsFile << seqNameVector[sequence] << '\t';
3208                                         
3209                                         string newSeq = "";
3210                                         
3211                                         for(int k=0;k<lengths[sequence];k++){
3212                                                 char base = bases[k % 4];
3213                                                 int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
3214                         
3215                                                 for(int s=0;s<freq;s++){
3216                                                         newSeq += base;
3217                                                         //otuCountsFile << base;
3218                                                 }
3219                                         }
3220                                         otuCountsFile << newSeq.substr(4) << endl;
3221                                 }
3222                                 otuCountsFile << endl;
3223                         }
3224                 }
3225                 otuCountsFile.close();
3226                 outputNames.push_back(otuCountsFileName);
3227         
3228         }
3229         catch(exception& e) {
3230                 m->errorOut(e, "ShhherCommand", "writeClusters");
3231                 exit(1);        
3232         }               
3233 }
3234
3235 /**************************************************************************************************/
3236
3237 void ShhherCommand::getSingleLookUp(){
3238         try{
3239                 //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
3240                 singleLookUp.assign(HOMOPS * NUMBINS, 0);
3241                 
3242                 int index = 0;
3243                 ifstream lookUpFile;
3244                 m->openInputFile(lookupFileName, lookUpFile);
3245                 
3246                 for(int i=0;i<HOMOPS;i++){
3247                         
3248                         if (m->control_pressed) { break; }
3249                         
3250                         float logFracFreq;
3251                         lookUpFile >> logFracFreq;
3252                         
3253                         for(int j=0;j<NUMBINS;j++)      {
3254                                 lookUpFile >> singleLookUp[index];
3255                                 index++;
3256                         }
3257                 }       
3258                 lookUpFile.close();
3259         }
3260         catch(exception& e) {
3261                 m->errorOut(e, "ShhherCommand", "getSingleLookUp");
3262                 exit(1);
3263         }
3264 }
3265
3266 /**************************************************************************************************/
3267
3268 void ShhherCommand::getJointLookUp(){
3269         try{
3270                 
3271                 //      the most likely joint probability (-log) that two intenities have the same polymer length
3272                 jointLookUp.resize(NUMBINS * NUMBINS, 0);
3273                 
3274                 for(int i=0;i<NUMBINS;i++){
3275                         
3276                         if (m->control_pressed) { break; }
3277                         
3278                         for(int j=0;j<NUMBINS;j++){             
3279                                 
3280                                 double minSum = 100000000;
3281                                 
3282                                 for(int k=0;k<HOMOPS;k++){
3283                                         double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
3284                                         
3285                                         if(sum < minSum)        {       minSum = sum;           }
3286                                 }       
3287                                 jointLookUp[i * NUMBINS + j] = minSum;
3288                         }
3289                 }
3290         }
3291         catch(exception& e) {
3292                 m->errorOut(e, "ShhherCommand", "getJointLookUp");
3293                 exit(1);
3294         }
3295 }
3296
3297 /**************************************************************************************************/
3298
3299 double ShhherCommand::getProbIntensity(int intIntensity){                          
3300         try{
3301
3302                 double minNegLogProb = 100000000; 
3303
3304                 
3305                 for(int i=0;i<HOMOPS;i++){//loop signal strength
3306                         
3307                         if (m->control_pressed) { break; }
3308                         
3309                         float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
3310                         if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
3311                 }
3312                 
3313                 return minNegLogProb;
3314         }
3315         catch(exception& e) {
3316                 m->errorOut(e, "ShhherCommand", "getProbIntensity");
3317                 exit(1);
3318         }
3319 }
3320
3321
3322
3323