]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.cpp
added unix to ifdefs. minor changes while testing 1.24.0.
[mothur.git] / shhhercommand.cpp
1 /*
2  *  shhher.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ShhherCommand::setParameters(){  
14         try {
15                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
16                 CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
17                 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
18                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
21                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
22                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
23                 CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "ShhherCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string ShhherCommand::getHelpString(){  
38         try {
39                 string helpString = "";
40                 helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
41                 return helpString;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ShhherCommand", "getHelpString");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49
50 ShhherCommand::ShhherCommand(){ 
51         try {
52                 abort = true; calledHelp = true;
53                 setParameters();
54                 
55                 //initialize outputTypes
56 //              vector<string> tempOutNames;
57 //              outputTypes["pn.dist"] = tempOutNames;
58
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
62                 exit(1);
63         }
64 }
65
66 //**********************************************************************************************************************
67
68 ShhherCommand::ShhherCommand(string option) {
69         try {
70                 abort = false; calledHelp = false;   
71                 
72                 //allow user to run help
73                 if(option == "help") { help(); abort = true; calledHelp = true; }
74                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
75                 
76                 else {
77                         vector<string> myArray = setParameters();
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         map<string,string>::iterator it;
84                         
85                         //check to make sure all parameters are valid for command
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92 //                      outputTypes["pn.dist"] = tempOutNames;
93                         //                      outputTypes["fasta"] = tempOutNames;
94                         
95                         //if the user changes the input directory command factory will send this info to us in the output parameter 
96                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
97                         if (inputDir == "not found"){   inputDir = "";          }
98                         else {
99                                 string path;
100                                 it = parameters.find("flow");
101                                 //user has given a template file
102                                 if(it != parameters.end()){ 
103                                         path = m->hasPath(it->second);
104                                         //if the user has not given a path then, add inputdir. else leave path alone.
105                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
106                                 }
107                                 
108                                 it = parameters.find("lookup");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
114                                 }
115
116                                 it = parameters.find("file");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
122                                 }
123                         }
124                         
125                         
126                         //check for required parameters
127                         flowFileName = validParameter.validFile(parameters, "flow", true);
128                         flowFilesFileName = validParameter.validFile(parameters, "file", true);
129                         if (flowFileName == "not found" && flowFilesFileName == "not found") {
130                                 m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
131                                 m->mothurOutEndLine();
132                                 abort = true; 
133                         }
134                         else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
135                         
136                         if(flowFileName != "not found"){
137                                 compositeFASTAFileName = "";    
138                                 compositeNamesFileName = "";    
139                         }
140                         else{
141                                 ofstream temp;
142
143                                 //flow.files = 9 character offset
144                                 compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
145                                 m->openOutputFile(compositeFASTAFileName, temp);
146                                 temp.close();
147                                 
148                                 compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names";
149                                 m->openOutputFile(compositeNamesFileName, temp);
150                                 temp.close();
151                         }
152             
153             if(flowFilesFileName != "not found"){
154                 string fName;
155                 
156                 ifstream flowFilesFile;
157                 m->openInputFile(flowFilesFileName, flowFilesFile);
158                 while(flowFilesFile){
159                     fName = m->getline(flowFilesFile);
160                     
161                     //test if file is valid
162                     ifstream in;
163                     int ableToOpen = m->openInputFile(fName, in, "noerror");
164                     in.close(); 
165                     if (ableToOpen == 1) {
166                         if (inputDir != "") { //default path is set
167                             string tryPath = inputDir + fName;
168                             m->mothurOut("Unable to open " + fName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
169                             ifstream in2;
170                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
171                             in2.close();
172                             fName = tryPath;
173                         }
174                     }
175                     
176                     if (ableToOpen == 1) {
177                         if (m->getDefaultPath() != "") { //default path is set
178                             string tryPath = m->getDefaultPath() + m->getSimpleName(fName);
179                             m->mothurOut("Unable to open " + fName + ". Trying default " + tryPath); m->mothurOutEndLine();
180                             ifstream in2;
181                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
182                             in2.close();
183                             fName = tryPath;
184                         }
185                     }
186                     
187                     //if you can't open it its not in current working directory or inputDir, try mothur excutable location
188                     if (ableToOpen == 1) {
189                         string exepath = m->argv;
190                         string tempPath = exepath;
191                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
192                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
193                         
194                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(fName);
195                         m->mothurOut("Unable to open " + fName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
196                         ifstream in2;
197                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
198                         in2.close();
199                         fName = tryPath;
200                     }
201                     
202                     if (ableToOpen == 1) {  m->mothurOut("Unable to open " + fName + ". Disregarding. "); m->mothurOutEndLine();  }
203                     else { flowFileVector.push_back(fName); }
204                     m->gobble(flowFilesFile);
205                 }
206                 flowFilesFile.close();
207                 if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
208             }
209             else{
210                 flowFileVector.push_back(flowFileName);
211             }
212
213                         
214                         //if the user changes the output directory command factory will send this info to us in the output parameter 
215                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
216                                 outputDir = ""; 
217                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
218                         }
219                         
220                         
221                         //check for optional parameter and set defaults
222                         // ...at some point should added some additional type checking...
223                         string temp;
224                         temp = validParameter.validFile(parameters, "lookup", true);
225                         if (temp == "not found")        {       
226                                 lookupFileName = "LookUp_Titanium.pat"; 
227                                 
228                                 int ableToOpen;
229                                 ifstream in;
230                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
231                                 in.close();     
232                                 
233                                 //if you can't open it, try input location
234                                 if (ableToOpen == 1) {
235                                         if (inputDir != "") { //default path is set
236                                                 string tryPath = inputDir + lookupFileName;
237                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
238                                                 ifstream in2;
239                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
240                                                 in2.close();
241                                                 lookupFileName = tryPath;
242                                         }
243                                 }
244                                 
245                                 //if you can't open it, try default location
246                                 if (ableToOpen == 1) {
247                                         if (m->getDefaultPath() != "") { //default path is set
248                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
249                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
250                                                 ifstream in2;
251                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
252                                                 in2.close();
253                                                 lookupFileName = tryPath;
254                                         }
255                                 }
256                                 
257                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
258                                 if (ableToOpen == 1) {
259                                         string exepath = m->argv;
260                                         string tempPath = exepath;
261                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
262                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
263                                         
264                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
265                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
266                                         ifstream in2;
267                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
268                                         in2.close();
269                                         lookupFileName = tryPath;
270                                 }
271                                 
272                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
273                         }
274                         else if(temp == "not open")     {       
275                                 
276                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
277                                 
278                                 //if you can't open it its not inputDir, try mothur excutable location
279                                 string exepath = m->argv;
280                                 string tempPath = exepath;
281                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
282                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
283                                         
284                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
285                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
286                                 ifstream in2;
287                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
288                                 in2.close();
289                                 lookupFileName = tryPath;
290                                 
291                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
292                         }else                                           {       lookupFileName = temp;  }
293                         
294                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
295                         m->setProcessors(temp);
296                         m->mothurConvert(temp, processors);
297
298                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
299                         m->mothurConvert(temp, cutoff); 
300                         
301                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
302                         m->mothurConvert(temp, minDelta); 
303
304                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
305                         m->mothurConvert(temp, maxIters); 
306
307                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
308                         m->mothurConvert(temp, sigma); 
309                         
310                         flowOrder = validParameter.validFile(parameters, "order", false);
311                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
312                         else if(flowOrder.length() != 4){
313                                 m->mothurOut("The value of the order option must be four bases long\n");
314                         }
315                         
316                 }
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
320                 exit(1);
321         }
322 }
323 //**********************************************************************************************************************
324 #ifdef USE_MPI
325 int ShhherCommand::execute(){
326         try {
327                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
328                 
329                 int tag = 1976;
330                 MPI_Status status; 
331                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
332                 MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
333         
334                 if(pid == 0){
335
336                         for(int i=1;i<ncpus;i++){
337                                 MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
338                         }
339                         if(abort == 1){ return 0;       }
340
341                         processors = ncpus;
342                         
343                         m->mothurOut("\nGetting preliminary data...\n");
344                         getSingleLookUp();      if (m->control_pressed) { return 0; }
345                         getJointLookUp();       if (m->control_pressed) { return 0; }
346                         
347                         int numFiles = flowFileVector.size();
348
349                         for(int i=1;i<ncpus;i++){
350                                 MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
351                         }
352                         
353                         for(int i=0;i<numFiles;i++){
354                                 
355                                 if (m->control_pressed) { break; }
356                                 
357                                 double begClock = clock();
358                                 unsigned long long begTime = time(NULL);
359
360                                 flowFileName = flowFileVector[i];
361                                 
362                                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
363                                 m->mothurOut("Reading flowgrams...\n");
364                                 getFlowData();
365                                 
366                                 if (m->control_pressed) { break; }
367
368                                 m->mothurOut("Identifying unique flowgrams...\n");
369                                 getUniques();
370                                 
371                                 if (m->control_pressed) { break; }
372
373                                 m->mothurOut("Calculating distances between flowgrams...\n");
374                                 char fileName[1024];
375                                 strcpy(fileName, flowFileName.c_str());
376
377                                 for(int i=1;i<ncpus;i++){
378                                         MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
379
380                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
381                                         MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
382                                         MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
383                                         MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
384                                         MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
385                                         MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
386                                         MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
387                                         MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
388                                         MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
389                                         MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
390                                 }                               
391                                                         
392                                 string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
393                                 
394                                 if (m->control_pressed) { break; }
395                                 
396                                 int done;
397                                 for(int i=1;i<ncpus;i++){
398                                         MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
399                                         
400                                         m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
401                                         m->mothurRemove((distFileName + ".temp." + toString(i)));
402                                 }
403
404                                 string namesFileName = createNamesFile();
405                                 
406                                 if (m->control_pressed) { break; }
407                                 
408                                 m->mothurOut("\nClustering flowgrams...\n");
409                                 string listFileName = cluster(distFileName, namesFileName);
410                                 
411                                 if (m->control_pressed) { break; }
412                                 
413                                 getOTUData(listFileName);
414
415                                 m->mothurRemove(distFileName);
416                                 m->mothurRemove(namesFileName);
417                                 m->mothurRemove(listFileName);
418                                 
419                                 if (m->control_pressed) { break; }
420                                 
421                                 initPyroCluster();
422
423                                 if (m->control_pressed) { break; }
424                                 
425                                 for(int i=1;i<ncpus;i++){
426                                         MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
427                                         MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
428                                         MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
429                                         MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
430                                 }
431                                 
432                                 if (m->control_pressed) { break; }
433                                 
434                                 double maxDelta = 0;
435                                 int iter = 0;
436                                 
437                                 int numOTUsOnCPU = numOTUs / ncpus;
438                                 int numSeqsOnCPU = numSeqs / ncpus;
439                                 m->mothurOut("\nDenoising flowgrams...\n");
440                                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
441                                 
442                                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
443                                         
444                                         double cycClock = clock();
445                                         unsigned long long cycTime = time(NULL);
446                                         fill();
447                                         
448                                         if (m->control_pressed) { break; }
449
450                                         int total = singleTau.size();
451                                         for(int i=1;i<ncpus;i++){
452                                                 MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
453                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
454                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
455                                                 
456                                                 MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
457                                                 MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
458                                                 MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
459                                                 MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
460                                                 MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
461                                         }
462                                         
463                                         calcCentroidsDriver(0, numOTUsOnCPU);
464                                         
465                                         for(int i=1;i<ncpus;i++){
466                                                 int otuStart = i * numOTUs / ncpus;
467                                                 int otuStop = (i + 1) * numOTUs / ncpus;
468                                                 
469                                                 vector<int> tempCentroids(numOTUs, 0);
470                                                 vector<short> tempChange(numOTUs, 0);
471                                                 
472                                                 MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
473                                                 MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
474                                                 
475                                                 for(int j=otuStart;j<otuStop;j++){
476                                                         centroids[j] = tempCentroids[j];
477                                                         change[j] = tempChange[j];
478                                                 }
479                                         }
480                                                                         
481                                         maxDelta = getNewWeights(); if (m->control_pressed) { break; }
482                                         double nLL = getLikelihood(); if (m->control_pressed) { break; }
483                                         checkCentroids(); if (m->control_pressed) { break; }
484                                         
485                                         for(int i=1;i<ncpus;i++){
486                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
487                                                 MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
488                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
489                                         }
490                                         
491                                         calcNewDistancesParent(0, numSeqsOnCPU);
492
493                                         total = singleTau.size();
494
495                                         for(int i=1;i<ncpus;i++){
496                                                 int childTotal;
497                                                 int seqStart = i * numSeqs / ncpus;
498                                                 int seqStop = (i + 1) * numSeqs / ncpus;
499                                                 
500                                                 MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
501
502                                                 vector<int> childSeqIndex(childTotal, 0);
503                                                 vector<double> childSingleTau(childTotal, 0);
504                                                 vector<double> childDist(numSeqs * numOTUs, 0);
505                                                 vector<int> otuIndex(childTotal, 0);
506                                                 
507                                                 MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
508                                                 MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
509                                                 MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
510                                                 MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
511
512                                                 int oldTotal = total;
513                                                 total += childTotal;
514                                                 singleTau.resize(total, 0);
515                                                 seqIndex.resize(total, 0);
516                                                 seqNumber.resize(total, 0);
517                                                 
518                                                 int childIndex = 0;
519                                                 
520                                                 for(int j=oldTotal;j<total;j++){
521                                                         int otuI = otuIndex[childIndex];
522                                                         int seqI = childSeqIndex[childIndex];
523                                                         
524                                                         singleTau[j] = childSingleTau[childIndex];
525                                                         
526                                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
527                                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
528                                                         nSeqsPerOTU[otuI]++;
529                                                         childIndex++;
530                                                 }
531                                                 
532                                                 int index = seqStart * numOTUs;
533                                                 for(int j=seqStart;j<seqStop;j++){
534                                                         for(int k=0;k<numOTUs;k++){
535                                                                 dist[index] = childDist[index];
536                                                                 index++;
537                                                         }
538                                                 }                                       
539                                         }
540                                         
541                                         iter++;
542                                         
543                                         m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
544
545                                         if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
546                                                 int live = 1;
547                                                 for(int i=1;i<ncpus;i++){
548                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
549                                                 }
550                                         }
551                                         else{
552                                                 int live = 0;
553                                                 for(int i=1;i<ncpus;i++){
554                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
555                                                 }
556                                         }
557                                         
558                                 }       
559                                 
560                                 if (m->control_pressed) { break; }
561                                 
562                                 m->mothurOut("\nFinalizing...\n");
563                                 fill();
564                                 
565                                 if (m->control_pressed) { break; }
566                                 
567                                 setOTUs();
568                                 
569                                 vector<int> otuCounts(numOTUs, 0);
570                                 for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
571                                 calcCentroidsDriver(0, numOTUs);
572                                 
573                                 if (m->control_pressed) { break; }
574                                 
575                                 writeQualities(otuCounts);      if (m->control_pressed) { break; }
576                                 writeSequences(otuCounts);      if (m->control_pressed) { break; }
577                                 writeNames(otuCounts);          if (m->control_pressed) { break; }
578                                 writeClusters(otuCounts);       if (m->control_pressed) { break; }
579                                 writeGroups();                          if (m->control_pressed) { break; }
580                                 
581                                                                  
582                                 m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
583                         }
584                 }
585                 else{
586                         int abort = 1;
587
588                         MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
589                         if(abort){      return 0;       }
590
591                         int numFiles;
592                         MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
593
594                         for(int i=0;i<numFiles;i++){
595                                 
596                                 if (m->control_pressed) { break; }
597                                 
598                                 //Now into the pyrodist part
599                                 bool live = 1;
600
601                                 char fileName[1024];
602                                 MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
603                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
604                                 MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
605                                 MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
606                                 
607                                 flowDataIntI.resize(numSeqs * numFlowCells);
608                                 flowDataPrI.resize(numSeqs * numFlowCells);
609                                 mapUniqueToSeq.resize(numSeqs);
610                                 mapSeqToUnique.resize(numSeqs);
611                                 lengths.resize(numSeqs);
612                                 jointLookUp.resize(NUMBINS * NUMBINS);
613
614                                 MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
615                                 MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
616                                 MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
617                                 MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
618                                 MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
619                                 MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
620                                 MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
621
622                                 flowFileName = string(fileName);
623                                 int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
624                                 int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
625                                 
626                                 string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
627                                 
628                                 if (m->control_pressed) { break; }
629                                 
630                                 int done = 1;
631                                 MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
632
633                                 //Now into the pyronoise part
634                                 MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
635                                 
636                                 singleLookUp.resize(HOMOPS * NUMBINS);
637                                 uniqueFlowgrams.resize(numUniques * numFlowCells);
638                                 weight.resize(numOTUs);
639                                 centroids.resize(numOTUs);
640                                 change.resize(numOTUs);
641                                 dist.assign(numOTUs * numSeqs, 0);
642                                 nSeqsPerOTU.resize(numOTUs);
643                                 cumNumSeqs.resize(numOTUs);
644
645                                 MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
646                                 MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
647                                 MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
648                                 
649                                 int startOTU = pid * numOTUs / ncpus;
650                                 int endOTU = (pid + 1) * numOTUs / ncpus;
651
652                                 int startSeq = pid * numSeqs / ncpus;
653                                 int endSeq = (pid + 1) * numSeqs /ncpus;
654                                 
655                                 int total;
656
657                                 while(live){
658                                         
659                                         if (m->control_pressed) { break; }
660                                         
661                                         MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
662                                         singleTau.assign(total, 0.0000);
663                                         seqNumber.assign(total, 0);
664                                         seqIndex.assign(total, 0);
665
666                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
667                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
668                                         MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
669                                         MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
670                                         MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
671                                         MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
672                                         MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
673
674                                         calcCentroidsDriver(startOTU, endOTU);
675
676                                         MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
677                                         MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
678
679                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
680                                         MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
681                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
682
683                                         vector<int> otuIndex(total, 0);
684                                         calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
685                                         total = otuIndex.size();
686                                         
687                                         MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
688                                         MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
689                                         MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
690                                         MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
691                                         MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
692                                 
693                                         MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
694                                 }
695                         }
696                 }
697                 
698                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
699                 
700                 MPI_Barrier(MPI_COMM_WORLD);
701
702                 
703                 if(compositeFASTAFileName != ""){
704                         outputNames.push_back(compositeFASTAFileName);
705                         outputNames.push_back(compositeNamesFileName);
706                 }
707
708                 m->mothurOutEndLine();
709                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
710                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
711                 m->mothurOutEndLine();
712                 
713                 return 0;
714
715         }
716         catch(exception& e) {
717                 m->errorOut(e, "ShhherCommand", "execute");
718                 exit(1);
719         }
720 }
721 /**************************************************************************************************/
722
723 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
724         try{            
725                 ostringstream outStream;
726                 outStream.setf(ios::fixed, ios::floatfield);
727                 outStream.setf(ios::dec, ios::basefield);
728                 outStream.setf(ios::showpoint);
729                 outStream.precision(6);
730                 
731                 int begTime = time(NULL);
732                 double begClock = clock();
733                 
734                 for(int i=startSeq;i<stopSeq;i++){
735                         
736                         if (m->control_pressed) { break; }
737                         
738                         for(int j=0;j<i;j++){
739                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
740                                 
741                                 if(flowDistance < 1e-6){
742                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
743                                 }
744                                 else if(flowDistance <= cutoff){
745                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
746                                 }
747                         }
748                         if(i % 100 == 0){
749                                 m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
750                         }
751                 }
752                 
753                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
754                 if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
755                 
756                 if (m->control_pressed) { return fDistFileName; }
757                 
758                 m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
759
760                 ofstream distFile(fDistFileName.c_str());
761                 distFile << outStream.str();            
762                 distFile.close();
763                 
764                 return fDistFileName;
765         }
766         catch(exception& e) {
767                 m->errorOut(e, "ShhherCommand", "flowDistMPI");
768                 exit(1);
769         }
770 }
771 /**************************************************************************************************/
772  
773 void ShhherCommand::getOTUData(string listFileName){
774     try {
775         
776         ifstream listFile;
777         m->openInputFile(listFileName, listFile);
778         string label;
779         
780         listFile >> label >> numOTUs;
781         
782         otuData.assign(numSeqs, 0);
783         cumNumSeqs.assign(numOTUs, 0);
784         nSeqsPerOTU.assign(numOTUs, 0);
785         aaP.clear();aaP.resize(numOTUs);
786         
787         seqNumber.clear();
788         aaI.clear();
789         seqIndex.clear();
790         
791         string singleOTU = "";
792         
793         for(int i=0;i<numOTUs;i++){
794             
795             if (m->control_pressed) { break; }
796             
797             listFile >> singleOTU;
798             
799             istringstream otuString(singleOTU);
800             
801             while(otuString){
802                 
803                 string seqName = "";
804                 
805                 for(int j=0;j<singleOTU.length();j++){
806                     char letter = otuString.get();
807                     
808                     if(letter != ','){
809                         seqName += letter;
810                     }
811                     else{
812                         map<string,int>::iterator nmIt = nameMap.find(seqName);
813                         int index = nmIt->second;
814                         
815                         nameMap.erase(nmIt);
816                         
817                         otuData[index] = i;
818                         nSeqsPerOTU[i]++;
819                         aaP[i].push_back(index);
820                         seqName = "";
821                     }
822                 }
823                 
824                 map<string,int>::iterator nmIt = nameMap.find(seqName);
825                 
826                 int index = nmIt->second;
827                 nameMap.erase(nmIt);
828                 
829                 otuData[index] = i;
830                 nSeqsPerOTU[i]++;
831                 aaP[i].push_back(index);        
832                 
833                 otuString.get();
834             }
835             
836             sort(aaP[i].begin(), aaP[i].end());
837             for(int j=0;j<nSeqsPerOTU[i];j++){
838                 seqNumber.push_back(aaP[i][j]);
839             }
840             for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
841                 aaP[i].push_back(0);
842             }
843             
844             
845         }
846         
847         for(int i=1;i<numOTUs;i++){
848             cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
849         }
850         aaI = aaP;
851         seqIndex = seqNumber;
852         
853         listFile.close();       
854         
855     }
856     catch(exception& e) {
857         m->errorOut(e, "ShhherCommand", "getOTUData");
858         exit(1);        
859     }           
860 }
861
862 /**************************************************************************************************/
863
864 void ShhherCommand::initPyroCluster(){                          
865     try{
866         if (numOTUs < processors) { processors = 1; }
867         
868         dist.assign(numSeqs * numOTUs, 0);
869         change.assign(numOTUs, 1);
870         centroids.assign(numOTUs, -1);
871         weight.assign(numOTUs, 0);
872         singleTau.assign(numSeqs, 1.0);
873         
874         nSeqsBreaks.assign(processors+1, 0);
875         nOTUsBreaks.assign(processors+1, 0);
876         
877         nSeqsBreaks[0] = 0;
878         for(int i=0;i<processors;i++){
879             nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
880             nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
881         }
882         nSeqsBreaks[processors] = numSeqs;
883         nOTUsBreaks[processors] = numOTUs;
884     }
885     catch(exception& e) {
886         m->errorOut(e, "ShhherCommand", "initPyroCluster");
887         exit(1);        
888     }
889 }
890
891 /**************************************************************************************************/
892
893 void ShhherCommand::fill(){
894     try {
895         int index = 0;
896         for(int i=0;i<numOTUs;i++){
897             
898             if (m->control_pressed) { break; }
899             
900             cumNumSeqs[i] = index;
901             for(int j=0;j<nSeqsPerOTU[i];j++){
902                 seqNumber[index] = aaP[i][j];
903                 seqIndex[index] = aaI[i][j];
904                 
905                 index++;
906             }
907         }
908     }
909     catch(exception& e) {
910         m->errorOut(e, "ShhherCommand", "fill");
911         exit(1);        
912     }           
913 }
914
915 /**************************************************************************************************/
916  
917 void ShhherCommand::getFlowData(){
918     try{
919         ifstream flowFile;
920         m->openInputFile(flowFileName, flowFile);
921         
922         string seqName;
923         seqNameVector.clear();
924         lengths.clear();
925         flowDataIntI.clear();
926         nameMap.clear();
927         
928         
929         int currentNumFlowCells;
930         
931         float intensity;
932         
933         flowFile >> numFlowCells;
934         int index = 0;//pcluster
935         while(!flowFile.eof()){
936             
937             if (m->control_pressed) { break; }
938             
939             flowFile >> seqName >> currentNumFlowCells;
940             lengths.push_back(currentNumFlowCells);
941             
942             seqNameVector.push_back(seqName);
943             nameMap[seqName] = index++;//pcluster
944             
945             for(int i=0;i<numFlowCells;i++){
946                 flowFile >> intensity;
947                 if(intensity > 9.99)    {       intensity = 9.99;       }
948                 int intI = int(100 * intensity + 0.0001);
949                 flowDataIntI.push_back(intI);
950             }
951             m->gobble(flowFile);
952         }
953         flowFile.close();
954         
955         numSeqs = seqNameVector.size();         
956         
957         for(int i=0;i<numSeqs;i++){
958             
959             if (m->control_pressed) { break; }
960             
961             int iNumFlowCells = i * numFlowCells;
962             for(int j=lengths[i];j<numFlowCells;j++){
963                 flowDataIntI[iNumFlowCells + j] = 0;
964             }
965         }
966         
967     }
968     catch(exception& e) {
969         m->errorOut(e, "ShhherCommand", "getFlowData");
970         exit(1);
971     }
972 }
973 /**************************************************************************************************/
974 void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
975         
976         try{
977                 vector<double> newTau(numOTUs,0);
978                 vector<double> norms(numSeqs, 0);
979                 otuIndex.clear();
980                 seqIndex.clear();
981                 singleTau.clear();
982                 
983                 for(int i=startSeq;i<stopSeq;i++){
984                         
985                         if (m->control_pressed) { break; }
986                         
987                         double offset = 1e8;
988                         int indexOffset = i * numOTUs;
989                         
990                         for(int j=0;j<numOTUs;j++){
991                                 
992                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
993                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
994                                 }
995                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
996                                         offset = dist[indexOffset + j];
997                                 }
998                         }
999                         
1000                         for(int j=0;j<numOTUs;j++){
1001                                 if(weight[j] > MIN_WEIGHT){
1002                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1003                                         norms[i] += newTau[j];
1004                                 }
1005                                 else{
1006                                         newTau[j] = 0.0;
1007                                 }
1008                         }
1009                         
1010                         for(int j=0;j<numOTUs;j++){
1011                 
1012                                 newTau[j] /= norms[i];
1013                                 
1014                                 if(newTau[j] > MIN_TAU){
1015                                         otuIndex.push_back(j);
1016                                         seqIndex.push_back(i);
1017                                         singleTau.push_back(newTau[j]);
1018                                 }
1019                         }
1020                         
1021                 }
1022         }
1023         catch(exception& e) {
1024                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
1025                 exit(1);        
1026         }               
1027 }
1028
1029 /**************************************************************************************************/
1030
1031 void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
1032     
1033     try{
1034         
1035         int total = 0;
1036         vector<double> newTau(numOTUs,0);
1037         vector<double> norms(numSeqs, 0);
1038         nSeqsPerOTU.assign(numOTUs, 0);
1039         
1040         for(int i=startSeq;i<stopSeq;i++){
1041             
1042             if (m->control_pressed) { break; }
1043             
1044             int indexOffset = i * numOTUs;
1045             
1046             double offset = 1e8;
1047             
1048             for(int j=0;j<numOTUs;j++){
1049                 
1050                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1051                     dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1052                 }
1053                 
1054                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1055                     offset = dist[indexOffset + j];
1056                 }
1057             }
1058             
1059             for(int j=0;j<numOTUs;j++){
1060                 if(weight[j] > MIN_WEIGHT){
1061                     newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1062                     norms[i] += newTau[j];
1063                 }
1064                 else{
1065                     newTau[j] = 0.0;
1066                 }
1067             }
1068             
1069             for(int j=0;j<numOTUs;j++){
1070                 newTau[j] /= norms[i];
1071             }
1072             
1073             for(int j=0;j<numOTUs;j++){
1074                 if(newTau[j] > MIN_TAU){
1075                     
1076                     int oldTotal = total;
1077                     
1078                     total++;
1079                     
1080                     singleTau.resize(total, 0);
1081                     seqNumber.resize(total, 0);
1082                     seqIndex.resize(total, 0);
1083                     
1084                     singleTau[oldTotal] = newTau[j];
1085                     
1086                     aaP[j][nSeqsPerOTU[j]] = oldTotal;
1087                     aaI[j][nSeqsPerOTU[j]] = i;
1088                     nSeqsPerOTU[j]++;
1089                 }
1090             }
1091             
1092         }
1093         
1094     }
1095     catch(exception& e) {
1096         m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
1097         exit(1);        
1098     }           
1099 }
1100
1101 /**************************************************************************************************/
1102
1103 void ShhherCommand::setOTUs(){
1104     
1105     try {
1106         vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
1107         
1108         for(int i=0;i<numOTUs;i++){
1109             
1110             if (m->control_pressed) { break; }
1111             
1112             for(int j=0;j<nSeqsPerOTU[i];j++){
1113                 int index = cumNumSeqs[i] + j;
1114                 double tauValue = singleTau[seqNumber[index]];
1115                 int sIndex = seqIndex[index];
1116                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
1117             }
1118         }
1119         
1120         for(int i=0;i<numSeqs;i++){
1121             double maxTau = -1.0000;
1122             int maxOTU = -1;
1123             for(int j=0;j<numOTUs;j++){
1124                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
1125                     maxTau = bigTauMatrix[i * numOTUs + j];
1126                     maxOTU = j;
1127                 }
1128             }
1129             
1130             otuData[i] = maxOTU;
1131         }
1132         
1133         nSeqsPerOTU.assign(numOTUs, 0);         
1134         
1135         for(int i=0;i<numSeqs;i++){
1136             int index = otuData[i];
1137             
1138             singleTau[i] = 1.0000;
1139             dist[i] = 0.0000;
1140             
1141             aaP[index][nSeqsPerOTU[index]] = i;
1142             aaI[index][nSeqsPerOTU[index]] = i;
1143             
1144             nSeqsPerOTU[index]++;
1145         }
1146         fill(); 
1147     }
1148     catch(exception& e) {
1149         m->errorOut(e, "ShhherCommand", "setOTUs");
1150         exit(1);        
1151     }           
1152 }
1153
1154 /**************************************************************************************************/
1155  
1156 void ShhherCommand::getUniques(){
1157     try{
1158         
1159         
1160         numUniques = 0;
1161         uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
1162         uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
1163         uniqueLengths.assign(numSeqs, 0);
1164         mapSeqToUnique.assign(numSeqs, -1);
1165         mapUniqueToSeq.assign(numSeqs, -1);
1166         
1167         vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
1168         
1169         for(int i=0;i<numSeqs;i++){
1170             
1171             if (m->control_pressed) { break; }
1172             
1173             int index = 0;
1174             
1175             vector<short> current(numFlowCells);
1176             for(int j=0;j<numFlowCells;j++){
1177                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
1178             }
1179             
1180             for(int j=0;j<numUniques;j++){
1181                 int offset = j * numFlowCells;
1182                 bool toEnd = 1;
1183                 
1184                 int shorterLength;
1185                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
1186                 else                                                            {       shorterLength = uniqueLengths[j];       }
1187                 
1188                 for(int k=0;k<shorterLength;k++){
1189                     if(current[k] != uniqueFlowgrams[offset + k]){
1190                         toEnd = 0;
1191                         break;
1192                     }
1193                 }
1194                 
1195                 if(toEnd){
1196                     mapSeqToUnique[i] = j;
1197                     uniqueCount[j]++;
1198                     index = j;
1199                     if(lengths[i] > uniqueLengths[j])   {       uniqueLengths[j] = lengths[i];  }
1200                     break;
1201                 }
1202                 index++;
1203             }
1204             
1205             if(index == numUniques){
1206                 uniqueLengths[numUniques] = lengths[i];
1207                 uniqueCount[numUniques] = 1;
1208                 mapSeqToUnique[i] = numUniques;//anMap
1209                 mapUniqueToSeq[numUniques] = i;//anF
1210                 
1211                 for(int k=0;k<numFlowCells;k++){
1212                     uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
1213                     uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
1214                 }
1215                 
1216                 numUniques++;
1217             }
1218         }
1219         uniqueFlowDataIntI.resize(numFlowCells * numUniques);
1220         uniqueLengths.resize(numUniques);       
1221         
1222         flowDataPrI.resize(numSeqs * numFlowCells, 0);
1223         for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
1224     }
1225     catch(exception& e) {
1226         m->errorOut(e, "ShhherCommand", "getUniques");
1227         exit(1);
1228     }
1229 }
1230
1231 /**************************************************************************************************/
1232
1233 float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
1234     try{
1235         int minLength = lengths[mapSeqToUnique[seqA]];
1236         if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
1237         
1238         int ANumFlowCells = seqA * numFlowCells;
1239         int BNumFlowCells = seqB * numFlowCells;
1240         
1241         float dist = 0;
1242         
1243         for(int i=0;i<minLength;i++){
1244             
1245             if (m->control_pressed) { break; }
1246             
1247             int flowAIntI = flowDataIntI[ANumFlowCells + i];
1248             float flowAPrI = flowDataPrI[ANumFlowCells + i];
1249             
1250             int flowBIntI = flowDataIntI[BNumFlowCells + i];
1251             float flowBPrI = flowDataPrI[BNumFlowCells + i];
1252             dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
1253         }
1254         
1255         dist /= (float) minLength;
1256         return dist;
1257     }
1258     catch(exception& e) {
1259         m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
1260         exit(1);
1261     }
1262 }
1263
1264 //**********************************************************************************************************************/
1265
1266 string ShhherCommand::cluster(string distFileName, string namesFileName){
1267     try {
1268         
1269         ReadMatrix* read = new ReadColumnMatrix(distFileName);  
1270         read->setCutoff(cutoff);
1271         
1272         NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
1273         clusterNameMap->readMap();
1274         read->read(clusterNameMap);
1275         
1276         ListVector* list = read->getListVector();
1277         SparseMatrix* matrix = read->getMatrix();
1278         
1279         delete read; 
1280         delete clusterNameMap; 
1281         
1282         RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
1283         
1284         Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
1285         string tag = cluster->getTag();
1286         
1287         double clusterCutoff = cutoff;
1288         while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
1289             
1290             if (m->control_pressed) { break; }
1291             
1292             cluster->update(clusterCutoff);
1293         }
1294         
1295         list->setLabel(toString(cutoff));
1296         
1297         string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
1298         ofstream listFile;
1299         m->openOutputFile(listFileName, listFile);
1300         list->print(listFile);
1301         listFile.close();
1302         
1303         delete matrix;  delete cluster; delete rabund; delete list;
1304         
1305         return listFileName;
1306     }
1307     catch(exception& e) {
1308         m->errorOut(e, "ShhherCommand", "cluster");
1309         exit(1);        
1310     }           
1311 }
1312
1313 /**************************************************************************************************/
1314
1315 void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
1316     
1317     //this function gets the most likely homopolymer length at a flow position for a group of sequences
1318     //within an otu
1319     
1320     try{
1321         
1322         for(int i=start;i<finish;i++){
1323             
1324             if (m->control_pressed) { break; }
1325             
1326             double count = 0;
1327             int position = 0;
1328             int minFlowGram = 100000000;
1329             double minFlowValue = 1e8;
1330             change[i] = 0; //FALSE
1331             
1332             for(int j=0;j<nSeqsPerOTU[i];j++){
1333                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1334             }
1335             
1336             if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1337                 vector<double> adF(nSeqsPerOTU[i]);
1338                 vector<int> anL(nSeqsPerOTU[i]);
1339                 
1340                 for(int j=0;j<nSeqsPerOTU[i];j++){
1341                     int index = cumNumSeqs[i] + j;
1342                     int nI = seqIndex[index];
1343                     int nIU = mapSeqToUnique[nI];
1344                     
1345                     int k;
1346                     for(k=0;k<position;k++){
1347                         if(nIU == anL[k]){
1348                             break;
1349                         }
1350                     }
1351                     if(k == position){
1352                         anL[position] = nIU;
1353                         adF[position] = 0.0000;
1354                         position++;
1355                     }                                           
1356                 }
1357                 
1358                 for(int j=0;j<nSeqsPerOTU[i];j++){
1359                     int index = cumNumSeqs[i] + j;
1360                     int nI = seqIndex[index];
1361                     
1362                     double tauValue = singleTau[seqNumber[index]];
1363                     
1364                     for(int k=0;k<position;k++){
1365                         double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
1366                         adF[k] += dist * tauValue;
1367                     }
1368                 }
1369                 
1370                 for(int j=0;j<position;j++){
1371                     if(adF[j] < minFlowValue){
1372                         minFlowGram = j;
1373                         minFlowValue = adF[j];
1374                     }
1375                 }
1376                 
1377                 if(centroids[i] != anL[minFlowGram]){
1378                     change[i] = 1;
1379                     centroids[i] = anL[minFlowGram];
1380                 }
1381             }
1382             else if(centroids[i] != -1){
1383                 change[i] = 1;
1384                 centroids[i] = -1;                      
1385             }
1386         }
1387     }
1388     catch(exception& e) {
1389         m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1390         exit(1);        
1391     }           
1392 }
1393
1394 /**************************************************************************************************/
1395
1396 double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
1397     try{
1398         
1399         int flowAValue = cent * numFlowCells;
1400         int flowBValue = flow * numFlowCells;
1401         
1402         double dist = 0;
1403         
1404         for(int i=0;i<length;i++){
1405             dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1406             flowAValue++;
1407             flowBValue++;
1408         }
1409         
1410         return dist / (double)length;
1411     }
1412     catch(exception& e) {
1413         m->errorOut(e, "ShhherCommand", "getDistToCentroid");
1414         exit(1);        
1415     }           
1416 }
1417
1418 /**************************************************************************************************/
1419
1420 double ShhherCommand::getNewWeights(){
1421     try{
1422         
1423         double maxChange = 0;
1424         
1425         for(int i=0;i<numOTUs;i++){
1426             
1427             if (m->control_pressed) { break; }
1428             
1429             double difference = weight[i];
1430             weight[i] = 0;
1431             
1432             for(int j=0;j<nSeqsPerOTU[i];j++){
1433                 int index = cumNumSeqs[i] + j;
1434                 double tauValue = singleTau[seqNumber[index]];
1435                 weight[i] += tauValue;
1436             }
1437             
1438             difference = fabs(weight[i] - difference);
1439             if(difference > maxChange){ maxChange = difference; }
1440         }
1441         return maxChange;
1442     }
1443     catch(exception& e) {
1444         m->errorOut(e, "ShhherCommand", "getNewWeights");
1445         exit(1);        
1446     }           
1447 }
1448  
1449  /**************************************************************************************************/
1450  
1451 double ShhherCommand::getLikelihood(){
1452     
1453     try{
1454         
1455         vector<long double> P(numSeqs, 0);
1456         int effNumOTUs = 0;
1457         
1458         for(int i=0;i<numOTUs;i++){
1459             if(weight[i] > MIN_WEIGHT){
1460                 effNumOTUs++;
1461             }
1462         }
1463         
1464         string hold;
1465         for(int i=0;i<numOTUs;i++){
1466             
1467             if (m->control_pressed) { break; }
1468             
1469             for(int j=0;j<nSeqsPerOTU[i];j++){
1470                 int index = cumNumSeqs[i] + j;
1471                 int nI = seqIndex[index];
1472                 double singleDist = dist[seqNumber[index]];
1473                 
1474                 P[nI] += weight[i] * exp(-singleDist * sigma);
1475             }
1476         }
1477         double nLL = 0.00;
1478         for(int i=0;i<numSeqs;i++){
1479             if(P[i] == 0){      P[i] = DBL_EPSILON;     }
1480             
1481             nLL += -log(P[i]);
1482         }
1483         
1484         nLL = nLL -(double)numSeqs * log(sigma);
1485         
1486         return nLL; 
1487     }
1488     catch(exception& e) {
1489         m->errorOut(e, "ShhherCommand", "getNewWeights");
1490         exit(1);        
1491     }           
1492 }
1493
1494 /**************************************************************************************************/
1495
1496 void ShhherCommand::checkCentroids(){
1497     try{
1498         vector<int> unique(numOTUs, 1);
1499         
1500         for(int i=0;i<numOTUs;i++){
1501             if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
1502                 unique[i] = -1;
1503             }
1504         }
1505         
1506         for(int i=0;i<numOTUs;i++){
1507             
1508             if (m->control_pressed) { break; }
1509             
1510             if(unique[i] == 1){
1511                 for(int j=i+1;j<numOTUs;j++){
1512                     if(unique[j] == 1){
1513                         
1514                         if(centroids[j] == centroids[i]){
1515                             unique[j] = 0;
1516                             centroids[j] = -1;
1517                             
1518                             weight[i] += weight[j];
1519                             weight[j] = 0.0;
1520                         }
1521                     }
1522                 }
1523             }
1524         }
1525     }
1526     catch(exception& e) {
1527         m->errorOut(e, "ShhherCommand", "checkCentroids");
1528         exit(1);        
1529     }           
1530 }
1531  /**************************************************************************************************/
1532
1533
1534  
1535 void ShhherCommand::writeQualities(vector<int> otuCounts){
1536     
1537     try {
1538         string thisOutputDir = outputDir;
1539         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1540         string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.qual";
1541         
1542         ofstream qualityFile;
1543         m->openOutputFile(qualityFileName, qualityFile);
1544         
1545         qualityFile.setf(ios::fixed, ios::floatfield);
1546         qualityFile.setf(ios::showpoint);
1547         qualityFile << setprecision(6);
1548         
1549         vector<vector<int> > qualities(numOTUs);
1550         vector<double> pr(HOMOPS, 0);
1551         
1552         
1553         for(int i=0;i<numOTUs;i++){
1554             
1555             if (m->control_pressed) { break; }
1556             
1557             int index = 0;
1558             int base = 0;
1559             
1560             if(nSeqsPerOTU[i] > 0){
1561                 qualities[i].assign(1024, -1);
1562                 
1563                 while(index < numFlowCells){
1564                     double maxPrValue = 1e8;
1565                     short maxPrIndex = -1;
1566                     double count = 0.0000;
1567                     
1568                     pr.assign(HOMOPS, 0);
1569                     
1570                     for(int j=0;j<nSeqsPerOTU[i];j++){
1571                         int lIndex = cumNumSeqs[i] + j;
1572                         double tauValue = singleTau[seqNumber[lIndex]];
1573                         int sequenceIndex = aaI[i][j];
1574                         short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1575                         
1576                         count += tauValue;
1577                         
1578                         for(int s=0;s<HOMOPS;s++){
1579                             pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
1580                         }
1581                     }
1582                     
1583                     maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1584                     maxPrValue = pr[maxPrIndex];
1585                     
1586                     if(count > MIN_COUNT){
1587                         double U = 0.0000;
1588                         double norm = 0.0000;
1589                         
1590                         for(int s=0;s<HOMOPS;s++){
1591                             norm += exp(-(pr[s] - maxPrValue));
1592                         }
1593                         
1594                         for(int s=1;s<=maxPrIndex;s++){
1595                             int value = 0;
1596                             double temp = 0.0000;
1597                             
1598                             U += exp(-(pr[s-1]-maxPrValue))/norm;
1599                             
1600                             if(U>0.00){
1601                                 temp = log10(U);
1602                             }
1603                             else{
1604                                 temp = -10.1;
1605                             }
1606                             temp = floor(-10 * temp);
1607                             value = (int)floor(temp);
1608                             if(value > 100){    value = 100;    }
1609                             
1610                             qualities[i][base] = (int)value;
1611                             base++;
1612                         }
1613                     }
1614                     
1615                     index++;
1616                 }
1617             }
1618             
1619             
1620             if(otuCounts[i] > 0){
1621                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1622                 
1623                 int j=4;        //need to get past the first four bases
1624                 while(qualities[i][j] != -1){
1625                     qualityFile << qualities[i][j] << ' ';
1626                     j++;
1627                 }
1628                 qualityFile << endl;
1629             }
1630         }
1631         qualityFile.close();
1632         outputNames.push_back(qualityFileName);
1633         
1634     }
1635     catch(exception& e) {
1636         m->errorOut(e, "ShhherCommand", "writeQualities");
1637         exit(1);        
1638     }           
1639 }
1640
1641 /**************************************************************************************************/
1642
1643 void ShhherCommand::writeSequences(vector<int> otuCounts){
1644     try {
1645         string thisOutputDir = outputDir;
1646         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1647         string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.fasta";
1648         ofstream fastaFile;
1649         m->openOutputFile(fastaFileName, fastaFile);
1650         
1651         vector<string> names(numOTUs, "");
1652         
1653         for(int i=0;i<numOTUs;i++){
1654             
1655             if (m->control_pressed) { break; }
1656             
1657             int index = centroids[i];
1658             
1659             if(otuCounts[i] > 0){
1660                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1661                 
1662                 string newSeq = "";
1663                 
1664                 for(int j=0;j<numFlowCells;j++){
1665                     
1666                     char base = flowOrder[j % 4];
1667                     for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1668                         newSeq += base;
1669                     }
1670                 }
1671                 
1672                 fastaFile << newSeq.substr(4) << endl;
1673             }
1674         }
1675         fastaFile.close();
1676         
1677         outputNames.push_back(fastaFileName);
1678         
1679         if(compositeFASTAFileName != ""){
1680             m->appendFiles(fastaFileName, compositeFASTAFileName);
1681         }
1682     }
1683     catch(exception& e) {
1684         m->errorOut(e, "ShhherCommand", "writeSequences");
1685         exit(1);        
1686     }           
1687 }
1688
1689 /**************************************************************************************************/
1690
1691 void ShhherCommand::writeNames(vector<int> otuCounts){
1692     try {
1693         string thisOutputDir = outputDir;
1694         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1695         string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.names";
1696         ofstream nameFile;
1697         m->openOutputFile(nameFileName, nameFile);
1698         
1699         for(int i=0;i<numOTUs;i++){
1700             
1701             if (m->control_pressed) { break; }
1702             
1703             if(otuCounts[i] > 0){
1704                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1705                 
1706                 for(int j=1;j<nSeqsPerOTU[i];j++){
1707                     nameFile << ',' << seqNameVector[aaI[i][j]];
1708                 }
1709                 
1710                 nameFile << endl;
1711             }
1712         }
1713         nameFile.close();
1714         outputNames.push_back(nameFileName);
1715         
1716         
1717         if(compositeNamesFileName != ""){
1718             m->appendFiles(nameFileName, compositeNamesFileName);
1719         }               
1720     }
1721     catch(exception& e) {
1722         m->errorOut(e, "ShhherCommand", "writeNames");
1723         exit(1);        
1724     }           
1725 }
1726
1727 /**************************************************************************************************/
1728
1729 void ShhherCommand::writeGroups(){
1730     try {
1731         string thisOutputDir = outputDir;
1732         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1733         string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
1734         string groupFileName = fileRoot + "shhh.groups";
1735         ofstream groupFile;
1736         m->openOutputFile(groupFileName, groupFile);
1737         
1738         for(int i=0;i<numSeqs;i++){
1739             if (m->control_pressed) { break; }
1740             groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
1741         }
1742         groupFile.close();
1743         outputNames.push_back(groupFileName);
1744         
1745     }
1746     catch(exception& e) {
1747         m->errorOut(e, "ShhherCommand", "writeGroups");
1748         exit(1);        
1749     }           
1750 }
1751
1752 /**************************************************************************************************/
1753
1754 void ShhherCommand::writeClusters(vector<int> otuCounts){
1755     try {
1756         string thisOutputDir = outputDir;
1757         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1758         string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + "shhh.counts";
1759         ofstream otuCountsFile;
1760         m->openOutputFile(otuCountsFileName, otuCountsFile);
1761         
1762         string bases = flowOrder;
1763         
1764         for(int i=0;i<numOTUs;i++){
1765             
1766             if (m->control_pressed) {
1767                 break;
1768             }
1769             //output the translated version of the centroid sequence for the otu
1770             if(otuCounts[i] > 0){
1771                 int index = centroids[i];
1772                 
1773                 otuCountsFile << "ideal\t";
1774                 for(int j=8;j<numFlowCells;j++){
1775                     char base = bases[j % 4];
1776                     for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1777                         otuCountsFile << base;
1778                     }
1779                 }
1780                 otuCountsFile << endl;
1781                 
1782                 for(int j=0;j<nSeqsPerOTU[i];j++){
1783                     int sequence = aaI[i][j];
1784                     otuCountsFile << seqNameVector[sequence] << '\t';
1785                     
1786                     string newSeq = "";
1787                     
1788                     for(int k=0;k<lengths[sequence];k++){
1789                         char base = bases[k % 4];
1790                         int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1791                         
1792                         for(int s=0;s<freq;s++){
1793                             newSeq += base;
1794                             //otuCountsFile << base;
1795                         }
1796                     }
1797                     otuCountsFile << newSeq.substr(4) << endl;
1798                 }
1799                 otuCountsFile << endl;
1800             }
1801         }
1802         otuCountsFile.close();
1803         outputNames.push_back(otuCountsFileName);
1804         
1805     }
1806     catch(exception& e) {
1807         m->errorOut(e, "ShhherCommand", "writeClusters");
1808         exit(1);        
1809     }           
1810 }
1811
1812 #else
1813 //**********************************************************************************************************************
1814
1815 int ShhherCommand::execute(){
1816         try {
1817                 if (abort == true) { return 0; }
1818                 
1819                 getSingleLookUp();      if (m->control_pressed) { return 0; }
1820                 getJointLookUp();       if (m->control_pressed) { return 0; }
1821                 
1822         int numFiles = flowFileVector.size();
1823                 
1824         if (numFiles < processors) { processors = numFiles; }
1825         
1826 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1827         if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
1828         else { createProcesses(flowFileVector); } //each processor processes one file
1829 #else
1830         driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
1831 #endif
1832         
1833                 if(compositeFASTAFileName != ""){
1834                         outputNames.push_back(compositeFASTAFileName);
1835                         outputNames.push_back(compositeNamesFileName);
1836                 }
1837
1838                 m->mothurOutEndLine();
1839                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
1840                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
1841                 m->mothurOutEndLine();
1842                 
1843                 return 0;
1844         }
1845         catch(exception& e) {
1846                 m->errorOut(e, "ShhherCommand", "execute");
1847                 exit(1);
1848         }
1849 }
1850 #endif
1851 /**************************************************************************************************/
1852
1853 int ShhherCommand::createProcesses(vector<string> filenames){
1854     try {
1855         vector<int> processIDS;
1856                 int process = 1;
1857                 int num = 0;
1858                 
1859                 //sanity check
1860                 if (filenames.size() < processors) { processors = filenames.size(); }
1861                 
1862                 //divide the groups between the processors
1863                 vector<linePair> lines;
1864                 int numFilesPerProcessor = filenames.size() / processors;
1865                 for (int i = 0; i < processors; i++) {
1866                         int startIndex =  i * numFilesPerProcessor;
1867                         int endIndex = (i+1) * numFilesPerProcessor;
1868                         if(i == (processors - 1)){      endIndex = filenames.size();    }
1869                         lines.push_back(linePair(startIndex, endIndex));
1870                 }
1871                 
1872         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1873                 
1874                 //loop through and create all the processes you want
1875                 while (process != processors) {
1876                         int pid = fork();
1877                         
1878                         if (pid > 0) {
1879                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1880                                 process++;
1881                         }else if (pid == 0){
1882                                 num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
1883                                 exit(0);
1884                         }else { 
1885                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1886                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1887                                 exit(0);
1888                         }
1889                 }
1890                 
1891                 //do my part
1892                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
1893                 
1894                 //force parent to wait until all the processes are done
1895                 for (int i=0;i<processIDS.size();i++) { 
1896                         int temp = processIDS[i];
1897                         wait(&temp);
1898                 }
1899         
1900         #else
1901         
1902         //////////////////////////////////////////////////////////////////////////////////////////////////////
1903         
1904         /////////////////////// NOT WORKING, ACCESS VIOLATION ON READ OF FLOWGRAMS IN THREAD /////////////////
1905         
1906         //////////////////////////////////////////////////////////////////////////////////////////////////////
1907                 //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. 
1908                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1909                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1910                 
1911                 vector<shhhFlowsData*> pDataArray; 
1912                 DWORD   dwThreadIdArray[processors-1];
1913                 HANDLE  hThreadArray[processors-1]; 
1914                 
1915                 //Create processor worker threads.
1916                 for( int i=0; i<processors-1; i++ ){
1917                         // Allocate memory for thread data.
1918                         string extension = "";
1919                         if (i != 0) { extension = toString(i) + ".temp"; }
1920                         
1921             shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
1922                         pDataArray.push_back(tempFlow);
1923                         processIDS.push_back(i);
1924             
1925                         hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1926                 }
1927                 
1928                 //using the main process as a worker saves time and memory
1929                 //do my part
1930                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
1931                 
1932                 //Wait until all threads have terminated.
1933                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1934                 
1935                 //Close all thread handles and free memory allocations.
1936                 for(int i=0; i < pDataArray.size(); i++){
1937                         for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
1938                         CloseHandle(hThreadArray[i]);
1939                         delete pDataArray[i];
1940                 }
1941                 
1942         #endif
1943         
1944         for (int i=0;i<processIDS.size();i++) { 
1945             if (compositeFASTAFileName != "") {
1946                 m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
1947                 m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
1948                 m->mothurRemove((compositeFASTAFileName + toString(processIDS[i]) + ".temp"));
1949                 m->mothurRemove((compositeNamesFileName + toString(processIDS[i]) + ".temp"));
1950             }
1951         }
1952         
1953         return 0;
1954         
1955     }
1956         catch(exception& e) {
1957                 m->errorOut(e, "ShhherCommand", "createProcesses");
1958                 exit(1);
1959         }
1960 }
1961 /**************************************************************************************************/
1962
1963 int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
1964     try {
1965         
1966         for(int i=start;i<end;i++){
1967                         
1968                         if (m->control_pressed) { break; }
1969                         
1970                         string flowFileName = filenames[i];
1971             
1972                         m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
1973                         m->mothurOut("Reading flowgrams...\n");
1974                         
1975             vector<string> seqNameVector;
1976             vector<int> lengths;
1977             vector<short> flowDataIntI;
1978             vector<double> flowDataPrI;
1979             map<string, int> nameMap;
1980             vector<short> uniqueFlowgrams;
1981             vector<int> uniqueCount;
1982             vector<int> mapSeqToUnique;
1983             vector<int> mapUniqueToSeq;
1984             vector<int> uniqueLengths;
1985             int numFlowCells;
1986             
1987             int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
1988                         
1989                         if (m->control_pressed) { break; }
1990                         
1991                         m->mothurOut("Identifying unique flowgrams...\n");
1992                         int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
1993                         
1994                         if (m->control_pressed) { break; }
1995                         
1996                         m->mothurOut("Calculating distances between flowgrams...\n");
1997             string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
1998             unsigned long long begTime = time(NULL);
1999             double begClock = clock();
2000         
2001             flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);     
2002             
2003             m->mothurOutEndLine();
2004             m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
2005
2006             
2007                         string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
2008                         createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
2009                         
2010                         if (m->control_pressed) { break; }
2011                         
2012                         m->mothurOut("\nClustering flowgrams...\n");
2013             string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
2014                         cluster(listFileName, distFileName, namesFileName);
2015                         
2016                         if (m->control_pressed) { break; }
2017             
2018             vector<int> otuData;
2019             vector<int> cumNumSeqs;
2020             vector<int> nSeqsPerOTU;
2021             vector<vector<int> > aaP;   //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2022             vector<vector<int> > aaI;   //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2023             vector<int> seqNumber;              //tMaster->anP:         the sequence id number sorted by OTU
2024             vector<int> seqIndex;               //tMaster->anI;         the index that corresponds to seqNumber
2025
2026                         
2027                         int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
2028                         
2029                         if (m->control_pressed) { break; }
2030                         
2031                         m->mothurRemove(distFileName);
2032                         m->mothurRemove(namesFileName);
2033                         m->mothurRemove(listFileName);
2034                         
2035             vector<double> dist;                //adDist - distance of sequences to centroids
2036             vector<short> change;               //did the centroid sequence change? 0 = no; 1 = yes
2037             vector<int> centroids;              //the representative flowgram for each cluster m
2038             vector<double> weight;
2039             vector<double> singleTau;   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2040             vector<int> nSeqsBreaks;
2041             vector<int> nOTUsBreaks;
2042             
2043                         dist.assign(numSeqs * numOTUs, 0);
2044             change.assign(numOTUs, 1);
2045             centroids.assign(numOTUs, -1);
2046             weight.assign(numOTUs, 0);
2047             singleTau.assign(numSeqs, 1.0);
2048             
2049             nSeqsBreaks.assign(2, 0);
2050             nOTUsBreaks.assign(2, 0);
2051             
2052             nSeqsBreaks[0] = 0;
2053             nSeqsBreaks[1] = numSeqs;
2054             nOTUsBreaks[1] = numOTUs;
2055                         
2056                         if (m->control_pressed) { break; }
2057                         
2058                         double maxDelta = 0;
2059                         int iter = 0;
2060                         
2061                         begClock = clock();
2062                         begTime = time(NULL);
2063             
2064                         m->mothurOut("\nDenoising flowgrams...\n");
2065                         m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
2066                         
2067                         while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
2068                                 
2069                                 if (m->control_pressed) { break; }
2070                                 
2071                                 double cycClock = clock();
2072                                 unsigned long long cycTime = time(NULL);
2073                                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2074                                 
2075                                 if (m->control_pressed) { break; }
2076                 
2077                                 calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
2078                                 
2079                                 if (m->control_pressed) { break; }
2080                 
2081                                 maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
2082                 
2083                 if (m->control_pressed) { break; }
2084                 
2085                                 double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
2086                 
2087                 if (m->control_pressed) { break; }
2088                 
2089                                 checkCentroids(numOTUs, centroids, weight);
2090                                 
2091                                 if (m->control_pressed) { break; }
2092                                 
2093                                 calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
2094                                 
2095                                 if (m->control_pressed) { break; }
2096                                 
2097                                 iter++;
2098                                 
2099                                 m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
2100                 
2101                         }       
2102                         
2103                         if (m->control_pressed) { break; }
2104                         
2105                         m->mothurOut("\nFinalizing...\n");
2106                         fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2107                         
2108                         if (m->control_pressed) { break; }
2109                         
2110                         setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
2111                         
2112                         if (m->control_pressed) { break; }
2113                         
2114                         vector<int> otuCounts(numOTUs, 0);
2115                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
2116                         
2117                         calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
2118             
2119             if (m->control_pressed) { break; }
2120             
2121                         writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
2122                         writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
2123                         writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
2124                         writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                       if (m->control_pressed) { break; }
2125                         writeGroups(flowFileName, numSeqs, seqNameVector);                                              if (m->control_pressed) { break; }
2126                         
2127                         m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
2128                 }
2129                 
2130         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
2131         
2132         return 0;
2133         
2134     }catch(exception& e) {
2135             m->errorOut(e, "ShhherCommand", "driver");
2136             exit(1);
2137     }
2138 }
2139
2140 /**************************************************************************************************/
2141 int ShhherCommand::getFlowData(string filename, vector<string>& thisSeqNameVector, vector<int>& thisLengths, vector<short>& thisFlowDataIntI, map<string, int>& thisNameMap, int& numFlowCells){
2142         try{
2143        
2144                 ifstream flowFile;
2145        
2146                 m->openInputFile(filename, flowFile);
2147                 
2148                 string seqName;
2149                 int currentNumFlowCells;
2150                 float intensity;
2151         thisSeqNameVector.clear();
2152                 thisLengths.clear();
2153                 thisFlowDataIntI.clear();
2154                 thisNameMap.clear();
2155                 
2156                 flowFile >> numFlowCells;
2157                 int index = 0;//pcluster
2158                 while(!flowFile.eof()){
2159                         
2160                         if (m->control_pressed) { break; }
2161                         
2162                         flowFile >> seqName >> currentNumFlowCells;
2163                         thisLengths.push_back(currentNumFlowCells);
2164            
2165                         thisSeqNameVector.push_back(seqName);
2166                         thisNameMap[seqName] = index++;//pcluster
2167
2168                         for(int i=0;i<numFlowCells;i++){
2169                                 flowFile >> intensity;
2170                                 if(intensity > 9.99)    {       intensity = 9.99;       }
2171                                 int intI = int(100 * intensity + 0.0001);
2172                                 thisFlowDataIntI.push_back(intI);
2173                         }
2174                         m->gobble(flowFile);
2175                 }
2176                 flowFile.close();
2177                 
2178                 int numSeqs = thisSeqNameVector.size();         
2179                 
2180                 for(int i=0;i<numSeqs;i++){
2181                         
2182                         if (m->control_pressed) { break; }
2183                         
2184                         int iNumFlowCells = i * numFlowCells;
2185                         for(int j=thisLengths[i];j<numFlowCells;j++){
2186                                 thisFlowDataIntI[iNumFlowCells + j] = 0;
2187                         }
2188                 }
2189         
2190         return numSeqs;
2191                 
2192         }
2193         catch(exception& e) {
2194                 m->errorOut(e, "ShhherCommand", "getFlowData");
2195                 exit(1);
2196         }
2197 }
2198 /**************************************************************************************************/
2199
2200 int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector<int>& mapUniqueToSeq, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2201         try{            
2202         
2203                 ostringstream outStream;
2204                 outStream.setf(ios::fixed, ios::floatfield);
2205                 outStream.setf(ios::dec, ios::basefield);
2206                 outStream.setf(ios::showpoint);
2207                 outStream.precision(6);
2208                 
2209                 int begTime = time(NULL);
2210                 double begClock = clock();
2211         
2212                 for(int i=0;i<stopSeq;i++){
2213                         
2214                         if (m->control_pressed) { break; }
2215                         
2216                         for(int j=0;j<i;j++){
2217                                 float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
2218                 
2219                                 if(flowDistance < 1e-6){
2220                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
2221                                 }
2222                                 else if(flowDistance <= cutoff){
2223                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
2224                                 }
2225                         }
2226                         if(i % 100 == 0){
2227                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
2228                                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2229                                 m->mothurOutEndLine();
2230                         }
2231                 }
2232                 
2233                 ofstream distFile(distFileName.c_str());
2234                 distFile << outStream.str();            
2235                 distFile.close();
2236                 
2237                 if (m->control_pressed) {}
2238                 else {
2239                         m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
2240                         m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2241                         m->mothurOutEndLine();
2242                 }
2243         
2244         return 0;
2245         }
2246         catch(exception& e) {
2247                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
2248                 exit(1);
2249         }
2250 }
2251 /**************************************************************************************************/
2252
2253 float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2254         try{
2255                 int minLength = lengths[mapSeqToUnique[seqA]];
2256                 if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
2257                 
2258                 int ANumFlowCells = seqA * numFlowCells;
2259                 int BNumFlowCells = seqB * numFlowCells;
2260                 
2261                 float dist = 0;
2262                 
2263                 for(int i=0;i<minLength;i++){
2264                         
2265                         if (m->control_pressed) { break; }
2266                         
2267                         int flowAIntI = flowDataIntI[ANumFlowCells + i];
2268                         float flowAPrI = flowDataPrI[ANumFlowCells + i];
2269                         
2270                         int flowBIntI = flowDataIntI[BNumFlowCells + i];
2271                         float flowBPrI = flowDataPrI[BNumFlowCells + i];
2272                         dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
2273                 }
2274                 
2275                 dist /= (float) minLength;
2276                 return dist;
2277         }
2278         catch(exception& e) {
2279                 m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
2280                 exit(1);
2281         }
2282 }
2283
2284 /**************************************************************************************************/
2285
2286 int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector<short>& uniqueFlowgrams, vector<int>& uniqueCount, vector<int>& uniqueLengths, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2287         try{
2288                 int numUniques = 0;
2289                 uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
2290                 uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
2291                 uniqueLengths.assign(numSeqs, 0);
2292                 mapSeqToUnique.assign(numSeqs, -1);
2293                 mapUniqueToSeq.assign(numSeqs, -1);
2294                 
2295                 vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
2296                 
2297                 for(int i=0;i<numSeqs;i++){
2298                         
2299                         if (m->control_pressed) { break; }
2300                         
2301                         int index = 0;
2302                         
2303                         vector<short> current(numFlowCells);
2304                         for(int j=0;j<numFlowCells;j++){
2305                                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
2306                         }
2307             
2308                         for(int j=0;j<numUniques;j++){
2309                                 int offset = j * numFlowCells;
2310                                 bool toEnd = 1;
2311                                 
2312                                 int shorterLength;
2313                                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
2314                                 else                                                            {       shorterLength = uniqueLengths[j];       }
2315                 
2316                                 for(int k=0;k<shorterLength;k++){
2317                                         if(current[k] != uniqueFlowgrams[offset + k]){
2318                                                 toEnd = 0;
2319                                                 break;
2320                                         }
2321                                 }
2322                                 
2323                                 if(toEnd){
2324                                         mapSeqToUnique[i] = j;
2325                                         uniqueCount[j]++;
2326                                         index = j;
2327                                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
2328                                         break;
2329                                 }
2330                                 index++;
2331                         }
2332                         
2333                         if(index == numUniques){
2334                                 uniqueLengths[numUniques] = lengths[i];
2335                                 uniqueCount[numUniques] = 1;
2336                                 mapSeqToUnique[i] = numUniques;//anMap
2337                                 mapUniqueToSeq[numUniques] = i;//anF
2338                                 
2339                                 for(int k=0;k<numFlowCells;k++){
2340                                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
2341                                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
2342                                 }
2343                                 
2344                                 numUniques++;
2345                         }
2346                 }
2347                 uniqueFlowDataIntI.resize(numFlowCells * numUniques);
2348                 uniqueLengths.resize(numUniques);       
2349                 
2350                 flowDataPrI.resize(numSeqs * numFlowCells, 0);
2351                 for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
2352         
2353         return numUniques;
2354         }
2355         catch(exception& e) {
2356                 m->errorOut(e, "ShhherCommand", "getUniques");
2357                 exit(1);
2358         }
2359 }
2360 /**************************************************************************************************/
2361 int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector<string>& seqNameVector, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq){
2362         try{
2363                 
2364                 vector<string> duplicateNames(numUniques, "");
2365                 for(int i=0;i<numSeqs;i++){
2366                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
2367                 }
2368                 
2369                 ofstream nameFile;
2370                 m->openOutputFile(filename, nameFile);
2371                 
2372                 for(int i=0;i<numUniques;i++){
2373                         
2374                         if (m->control_pressed) { break; }
2375                         
2376             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2377                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2378                 }
2379                 
2380                 nameFile.close();
2381         
2382                 return 0;
2383         }
2384         catch(exception& e) {
2385                 m->errorOut(e, "ShhherCommand", "createNamesFile");
2386                 exit(1);
2387         }
2388 }
2389 //**********************************************************************************************************************
2390
2391 int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){
2392         try {
2393                 
2394                 ReadMatrix* read = new ReadColumnMatrix(distFileName);  
2395                 read->setCutoff(cutoff);
2396                 
2397                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
2398                 clusterNameMap->readMap();
2399                 read->read(clusterNameMap);
2400                 
2401                 ListVector* list = read->getListVector();
2402                 SparseMatrix* matrix = read->getMatrix();
2403                 
2404                 delete read; 
2405                 delete clusterNameMap; 
2406         
2407                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
2408                 
2409                 Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
2410                 string tag = cluster->getTag();
2411                 
2412                 double clusterCutoff = cutoff;
2413                 while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
2414                         
2415                         if (m->control_pressed) { break; }
2416                         
2417                         cluster->update(clusterCutoff);
2418                 }
2419                 
2420                 list->setLabel(toString(cutoff));
2421                 
2422                 ofstream listFile;
2423                 m->openOutputFile(filename, listFile);
2424                 list->print(listFile);
2425                 listFile.close();
2426                 
2427                 delete matrix;  delete cluster; delete rabund; delete list;
2428         
2429                 return 0;
2430         }
2431         catch(exception& e) {
2432                 m->errorOut(e, "ShhherCommand", "cluster");
2433                 exit(1);        
2434         }               
2435 }
2436 /**************************************************************************************************/
2437
2438 int ShhherCommand::getOTUData(int numSeqs, string fileName,  vector<int>& otuData,
2439                                vector<int>& cumNumSeqs,
2440                                vector<int>& nSeqsPerOTU,
2441                                vector<vector<int> >& aaP,       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2442                                vector<vector<int> >& aaI,       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2443                                vector<int>& seqNumber,          //tMaster->anP:         the sequence id number sorted by OTU
2444                                vector<int>& seqIndex,
2445                                map<string, int>& nameMap){
2446         try {
2447         
2448                 ifstream listFile;
2449                 m->openInputFile(fileName, listFile);
2450                 string label;
2451         int numOTUs;
2452                 
2453                 listFile >> label >> numOTUs;
2454         
2455                 otuData.assign(numSeqs, 0);
2456                 cumNumSeqs.assign(numOTUs, 0);
2457                 nSeqsPerOTU.assign(numOTUs, 0);
2458                 aaP.clear();aaP.resize(numOTUs);
2459                 
2460                 seqNumber.clear();
2461                 aaI.clear();
2462                 seqIndex.clear();
2463                 
2464                 string singleOTU = "";
2465                 
2466                 for(int i=0;i<numOTUs;i++){
2467                         
2468                         if (m->control_pressed) { break; }
2469             
2470                         listFile >> singleOTU;
2471                         
2472                         istringstream otuString(singleOTU);
2473             
2474                         while(otuString){
2475                                 
2476                                 string seqName = "";
2477                                 
2478                                 for(int j=0;j<singleOTU.length();j++){
2479                                         char letter = otuString.get();
2480                                         
2481                                         if(letter != ','){
2482                                                 seqName += letter;
2483                                         }
2484                                         else{
2485                                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2486                                                 int index = nmIt->second;
2487                                                 
2488                                                 nameMap.erase(nmIt);
2489                                                 
2490                                                 otuData[index] = i;
2491                                                 nSeqsPerOTU[i]++;
2492                                                 aaP[i].push_back(index);
2493                                                 seqName = "";
2494                                         }
2495                                 }
2496                                 
2497                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2498                 
2499                                 int index = nmIt->second;
2500                                 nameMap.erase(nmIt);
2501                 
2502                                 otuData[index] = i;
2503                                 nSeqsPerOTU[i]++;
2504                                 aaP[i].push_back(index);        
2505                                 
2506                                 otuString.get();
2507                         }
2508                         
2509                         sort(aaP[i].begin(), aaP[i].end());
2510                         for(int j=0;j<nSeqsPerOTU[i];j++){
2511                                 seqNumber.push_back(aaP[i][j]);
2512                         }
2513                         for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
2514                                 aaP[i].push_back(0);
2515                         }
2516                         
2517                         
2518                 }
2519                 
2520                 for(int i=1;i<numOTUs;i++){
2521                         cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
2522                 }
2523                 aaI = aaP;
2524                 seqIndex = seqNumber;
2525                 
2526                 listFile.close();       
2527         
2528         return numOTUs;
2529                 
2530         }
2531         catch(exception& e) {
2532                 m->errorOut(e, "ShhherCommand", "getOTUData");
2533                 exit(1);        
2534         }               
2535 }
2536 /**************************************************************************************************/
2537
2538 int ShhherCommand::calcCentroidsDriver(int numOTUs, 
2539                                           vector<int>& cumNumSeqs,
2540                                           vector<int>& nSeqsPerOTU,
2541                                           vector<int>& seqIndex,
2542                                           vector<short>& change,                //did the centroid sequence change? 0 = no; 1 = yes
2543                                           vector<int>& centroids,               //the representative flowgram for each cluster m
2544                                           vector<double>& singleTau,    //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2545                                           vector<int>& mapSeqToUnique,
2546                                           vector<short>& uniqueFlowgrams,
2547                                           vector<short>& flowDataIntI,
2548                                           vector<int>& lengths,
2549                                           int numFlowCells,
2550                                           vector<int>& seqNumber){                          
2551         
2552         //this function gets the most likely homopolymer length at a flow position for a group of sequences
2553         //within an otu
2554         
2555         try{
2556                 
2557                 for(int i=0;i<numOTUs;i++){
2558                         
2559                         if (m->control_pressed) { break; }
2560                         
2561                         double count = 0;
2562                         int position = 0;
2563                         int minFlowGram = 100000000;
2564                         double minFlowValue = 1e8;
2565                         change[i] = 0; //FALSE
2566                         
2567                         for(int j=0;j<nSeqsPerOTU[i];j++){
2568                                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
2569                         }
2570             
2571                         if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
2572                                 vector<double> adF(nSeqsPerOTU[i]);
2573                                 vector<int> anL(nSeqsPerOTU[i]);
2574                                 
2575                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2576                                         int index = cumNumSeqs[i] + j;
2577                                         int nI = seqIndex[index];
2578                                         int nIU = mapSeqToUnique[nI];
2579                                         
2580                                         int k;
2581                                         for(k=0;k<position;k++){
2582                                                 if(nIU == anL[k]){
2583                                                         break;
2584                                                 }
2585                                         }
2586                                         if(k == position){
2587                                                 anL[position] = nIU;
2588                                                 adF[position] = 0.0000;
2589                                                 position++;
2590                                         }                                               
2591                                 }
2592                                 
2593                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2594                                         int index = cumNumSeqs[i] + j;
2595                                         int nI = seqIndex[index];
2596                                         
2597                                         double tauValue = singleTau[seqNumber[index]];
2598                                         
2599                                         for(int k=0;k<position;k++){
2600                                                 double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
2601                                                 adF[k] += dist * tauValue;
2602                                         }
2603                                 }
2604                                 
2605                                 for(int j=0;j<position;j++){
2606                                         if(adF[j] < minFlowValue){
2607                                                 minFlowGram = j;
2608                                                 minFlowValue = adF[j];
2609                                         }
2610                                 }
2611                                 
2612                                 if(centroids[i] != anL[minFlowGram]){
2613                                         change[i] = 1;
2614                                         centroids[i] = anL[minFlowGram];
2615                                 }
2616                         }
2617                         else if(centroids[i] != -1){
2618                                 change[i] = 1;
2619                                 centroids[i] = -1;                      
2620                         }
2621                 }
2622         
2623         return 0;
2624         }
2625         catch(exception& e) {
2626                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
2627                 exit(1);        
2628         }               
2629 }
2630 /**************************************************************************************************/
2631
2632 double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector<short>& uniqueFlowgrams,
2633                                         vector<short>& flowDataIntI, int numFlowCells){
2634         try{
2635                 
2636                 int flowAValue = cent * numFlowCells;
2637                 int flowBValue = flow * numFlowCells;
2638                 
2639                 double dist = 0;
2640         
2641                 for(int i=0;i<length;i++){
2642                         dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
2643                         flowAValue++;
2644                         flowBValue++;
2645                 }
2646                 
2647                 return dist / (double)length;
2648         }
2649         catch(exception& e) {
2650                 m->errorOut(e, "ShhherCommand", "getDistToCentroid");
2651                 exit(1);        
2652         }               
2653 }
2654 /**************************************************************************************************/
2655
2656 double ShhherCommand::getNewWeights(int numOTUs, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<double>& singleTau, vector<int>& seqNumber, vector<double>& weight){
2657         try{
2658                 
2659                 double maxChange = 0;
2660                 
2661                 for(int i=0;i<numOTUs;i++){
2662                         
2663                         if (m->control_pressed) { break; }
2664                         
2665                         double difference = weight[i];
2666                         weight[i] = 0;
2667                         
2668                         for(int j=0;j<nSeqsPerOTU[i];j++){
2669                                 int index = cumNumSeqs[i] + j;
2670                                 double tauValue = singleTau[seqNumber[index]];
2671                                 weight[i] += tauValue;
2672                         }
2673                         
2674                         difference = fabs(weight[i] - difference);
2675                         if(difference > maxChange){     maxChange = difference; }
2676                 }
2677                 return maxChange;
2678         }
2679         catch(exception& e) {
2680                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2681                 exit(1);        
2682         }               
2683 }
2684
2685 /**************************************************************************************************/
2686
2687 double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<int>& seqNumber, vector<int>& cumNumSeqs, vector<int>& seqIndex, vector<double>& dist, vector<double>& weight){
2688         
2689         try{
2690                 
2691                 vector<long double> P(numSeqs, 0);
2692                 int effNumOTUs = 0;
2693                 
2694                 for(int i=0;i<numOTUs;i++){
2695                         if(weight[i] > MIN_WEIGHT){
2696                                 effNumOTUs++;
2697                         }
2698                 }
2699                 
2700                 string hold;
2701                 for(int i=0;i<numOTUs;i++){
2702                         
2703                         if (m->control_pressed) { break; }
2704                         
2705                         for(int j=0;j<nSeqsPerOTU[i];j++){
2706                                 int index = cumNumSeqs[i] + j;
2707                                 int nI = seqIndex[index];
2708                                 double singleDist = dist[seqNumber[index]];
2709                                 
2710                                 P[nI] += weight[i] * exp(-singleDist * sigma);
2711                         }
2712                 }
2713                 double nLL = 0.00;
2714                 for(int i=0;i<numSeqs;i++){
2715                         if(P[i] == 0){  P[i] = DBL_EPSILON;     }
2716             
2717                         nLL += -log(P[i]);
2718                 }
2719                 
2720                 nLL = nLL -(double)numSeqs * log(sigma);
2721         
2722                 return nLL; 
2723         }
2724         catch(exception& e) {
2725                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2726                 exit(1);        
2727         }               
2728 }
2729
2730 /**************************************************************************************************/
2731
2732 int ShhherCommand::checkCentroids(int numOTUs, vector<int>& centroids, vector<double>& weight){
2733         try{
2734                 vector<int> unique(numOTUs, 1);
2735                 
2736                 for(int i=0;i<numOTUs;i++){
2737                         if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
2738                                 unique[i] = -1;
2739                         }
2740                 }
2741                 
2742                 for(int i=0;i<numOTUs;i++){
2743                         
2744                         if (m->control_pressed) { break; }
2745                         
2746                         if(unique[i] == 1){
2747                                 for(int j=i+1;j<numOTUs;j++){
2748                                         if(unique[j] == 1){
2749                                                 
2750                                                 if(centroids[j] == centroids[i]){
2751                                                         unique[j] = 0;
2752                                                         centroids[j] = -1;
2753                                                         
2754                                                         weight[i] += weight[j];
2755                                                         weight[j] = 0.0;
2756                                                 }
2757                                         }
2758                                 }
2759                         }
2760                 }
2761         
2762         return 0;
2763         }
2764         catch(exception& e) {
2765                 m->errorOut(e, "ShhherCommand", "checkCentroids");
2766                 exit(1);        
2767         }               
2768 }
2769 /**************************************************************************************************/
2770
2771 void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<double>& dist, 
2772                                      vector<double>& weight, vector<short>& change, vector<int>& centroids,
2773                                      vector<vector<int> >& aaP, vector<double>& singleTau, vector<vector<int> >& aaI,   
2774                                      vector<int>& seqNumber, vector<int>& seqIndex,
2775                                      vector<short>& uniqueFlowgrams,
2776                                      vector<short>& flowDataIntI, int numFlowCells, vector<int>& lengths){
2777         
2778         try{
2779                 
2780                 int total = 0;
2781                 vector<double> newTau(numOTUs,0);
2782                 vector<double> norms(numSeqs, 0);
2783                 nSeqsPerOTU.assign(numOTUs, 0);
2784         
2785                 for(int i=0;i<numSeqs;i++){
2786                         
2787                         if (m->control_pressed) { break; }
2788                         
2789                         int indexOffset = i * numOTUs;
2790             
2791                         double offset = 1e8;
2792                         
2793                         for(int j=0;j<numOTUs;j++){
2794                 
2795                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
2796                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
2797                                 }
2798                 
2799                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
2800                                         offset = dist[indexOffset + j];
2801                                 }
2802                         }
2803             
2804                         for(int j=0;j<numOTUs;j++){
2805                                 if(weight[j] > MIN_WEIGHT){
2806                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
2807                                         norms[i] += newTau[j];
2808                                 }
2809                                 else{
2810                                         newTau[j] = 0.0;
2811                                 }
2812                         }
2813             
2814                         for(int j=0;j<numOTUs;j++){
2815                                 newTau[j] /= norms[i];
2816                         }
2817             
2818                         for(int j=0;j<numOTUs;j++){
2819                                 if(newTau[j] > MIN_TAU){
2820                                         
2821                                         int oldTotal = total;
2822                                         
2823                                         total++;
2824                                         
2825                                         singleTau.resize(total, 0);
2826                                         seqNumber.resize(total, 0);
2827                                         seqIndex.resize(total, 0);
2828                                         
2829                                         singleTau[oldTotal] = newTau[j];
2830                                         
2831                                         aaP[j][nSeqsPerOTU[j]] = oldTotal;
2832                                         aaI[j][nSeqsPerOTU[j]] = i;
2833                                         nSeqsPerOTU[j]++;
2834                                 }
2835                         }
2836             
2837                 }
2838         
2839         }
2840         catch(exception& e) {
2841                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
2842                 exit(1);        
2843         }               
2844 }
2845 /**************************************************************************************************/
2846
2847 int ShhherCommand::fill(int numOTUs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
2848         try {
2849                 int index = 0;
2850                 for(int i=0;i<numOTUs;i++){
2851                         
2852                         if (m->control_pressed) { return 0; }
2853                         
2854                         cumNumSeqs[i] = index;
2855                         for(int j=0;j<nSeqsPerOTU[i];j++){
2856                                 seqNumber[index] = aaP[i][j];
2857                                 seqIndex[index] = aaI[i][j];
2858                                 
2859                                 index++;
2860                         }
2861                 }
2862         
2863         return 0; 
2864         }
2865         catch(exception& e) {
2866                 m->errorOut(e, "ShhherCommand", "fill");
2867                 exit(1);        
2868         }               
2869 }
2870 /**************************************************************************************************/
2871
2872 void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU,
2873                             vector<int>& otuData, vector<double>& singleTau, vector<double>& dist, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
2874         
2875         try {
2876                 vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
2877                 
2878                 for(int i=0;i<numOTUs;i++){
2879                         
2880                         if (m->control_pressed) { break; }
2881                         
2882                         for(int j=0;j<nSeqsPerOTU[i];j++){
2883                                 int index = cumNumSeqs[i] + j;
2884                                 double tauValue = singleTau[seqNumber[index]];
2885                                 int sIndex = seqIndex[index];
2886                                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
2887                         }
2888                 }
2889                 
2890                 for(int i=0;i<numSeqs;i++){
2891                         double maxTau = -1.0000;
2892                         int maxOTU = -1;
2893                         for(int j=0;j<numOTUs;j++){
2894                                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
2895                                         maxTau = bigTauMatrix[i * numOTUs + j];
2896                                         maxOTU = j;
2897                                 }
2898                         }
2899                         
2900                         otuData[i] = maxOTU;
2901                 }
2902                 
2903                 nSeqsPerOTU.assign(numOTUs, 0);         
2904                 
2905                 for(int i=0;i<numSeqs;i++){
2906                         int index = otuData[i];
2907                         
2908                         singleTau[i] = 1.0000;
2909                         dist[i] = 0.0000;
2910                         
2911                         aaP[index][nSeqsPerOTU[index]] = i;
2912                         aaI[index][nSeqsPerOTU[index]] = i;
2913                         
2914                         nSeqsPerOTU[index]++;
2915                 }
2916         
2917                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);  
2918         }
2919         catch(exception& e) {
2920                 m->errorOut(e, "ShhherCommand", "setOTUs");
2921                 exit(1);        
2922         }               
2923 }
2924 /**************************************************************************************************/
2925
2926 void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
2927                                    vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
2928                                    vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
2929         
2930         try {
2931                 string thisOutputDir = outputDir;
2932                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
2933                 string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
2934         
2935                 ofstream qualityFile;
2936                 m->openOutputFile(qualityFileName, qualityFile);
2937         
2938                 qualityFile.setf(ios::fixed, ios::floatfield);
2939                 qualityFile.setf(ios::showpoint);
2940                 qualityFile << setprecision(6);
2941                 
2942                 vector<vector<int> > qualities(numOTUs);
2943                 vector<double> pr(HOMOPS, 0);
2944                 
2945                 
2946                 for(int i=0;i<numOTUs;i++){
2947                         
2948                         if (m->control_pressed) { break; }
2949                         
2950                         int index = 0;
2951                         int base = 0;
2952                         
2953                         if(nSeqsPerOTU[i] > 0){
2954                                 qualities[i].assign(1024, -1);
2955                                 
2956                                 while(index < numFlowCells){
2957                                         double maxPrValue = 1e8;
2958                                         short maxPrIndex = -1;
2959                                         double count = 0.0000;
2960                                         
2961                                         pr.assign(HOMOPS, 0);
2962                                         
2963                                         for(int j=0;j<nSeqsPerOTU[i];j++){
2964                                                 int lIndex = cumNumSeqs[i] + j;
2965                                                 double tauValue = singleTau[seqNumber[lIndex]];
2966                                                 int sequenceIndex = aaI[i][j];
2967                                                 short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
2968                                                 
2969                                                 count += tauValue;
2970                                                 
2971                                                 for(int s=0;s<HOMOPS;s++){
2972                                                         pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
2973                                                 }
2974                                         }
2975                                         
2976                                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
2977                                         maxPrValue = pr[maxPrIndex];
2978                                         
2979                                         if(count > MIN_COUNT){
2980                                                 double U = 0.0000;
2981                                                 double norm = 0.0000;
2982                                                 
2983                                                 for(int s=0;s<HOMOPS;s++){
2984                                                         norm += exp(-(pr[s] - maxPrValue));
2985                                                 }
2986                                                 
2987                                                 for(int s=1;s<=maxPrIndex;s++){
2988                                                         int value = 0;
2989                                                         double temp = 0.0000;
2990                                                         
2991                                                         U += exp(-(pr[s-1]-maxPrValue))/norm;
2992                                                         
2993                                                         if(U>0.00){
2994                                                                 temp = log10(U);
2995                                                         }
2996                                                         else{
2997                                                                 temp = -10.1;
2998                                                         }
2999                                                         temp = floor(-10 * temp);
3000                                                         value = (int)floor(temp);
3001                                                         if(value > 100){        value = 100;    }
3002                                                         
3003                                                         qualities[i][base] = (int)value;
3004                                                         base++;
3005                                                 }
3006                                         }
3007                                         
3008                                         index++;
3009                                 }
3010                         }
3011                         
3012                         
3013                         if(otuCounts[i] > 0){
3014                                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
3015                                 
3016                                 int j=4;        //need to get past the first four bases
3017                                 while(qualities[i][j] != -1){
3018                                         qualityFile << qualities[i][j] << ' ';
3019                                         j++;
3020                                 }
3021                                 qualityFile << endl;
3022                         }
3023                 }
3024                 qualityFile.close();
3025                 outputNames.push_back(qualityFileName);
3026         
3027         }
3028         catch(exception& e) {
3029                 m->errorOut(e, "ShhherCommand", "writeQualities");
3030                 exit(1);        
3031         }               
3032 }
3033
3034 /**************************************************************************************************/
3035
3036 void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
3037         try {
3038                 string thisOutputDir = outputDir;
3039                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3040                 string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
3041                 ofstream fastaFile;
3042                 m->openOutputFile(fastaFileName, fastaFile);
3043                 
3044                 vector<string> names(numOTUs, "");
3045                 
3046                 for(int i=0;i<numOTUs;i++){
3047                         
3048                         if (m->control_pressed) { break; }
3049                         
3050                         int index = centroids[i];
3051                         
3052                         if(otuCounts[i] > 0){
3053                                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
3054                                 
3055                                 string newSeq = "";
3056                                 
3057                                 for(int j=0;j<numFlowCells;j++){
3058                                         
3059                                         char base = flowOrder[j % 4];
3060                                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
3061                                                 newSeq += base;
3062                                         }
3063                                 }
3064                                 
3065                                 fastaFile << newSeq.substr(4) << endl;
3066                         }
3067                 }
3068                 fastaFile.close();
3069         
3070                 outputNames.push_back(fastaFileName);
3071         
3072                 if(thisCompositeFASTAFileName != ""){
3073                         m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
3074                 }
3075         }
3076         catch(exception& e) {
3077                 m->errorOut(e, "ShhherCommand", "writeSequences");
3078                 exit(1);        
3079         }               
3080 }
3081
3082 /**************************************************************************************************/
3083
3084 void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
3085         try {
3086                 string thisOutputDir = outputDir;
3087                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3088                 string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
3089                 ofstream nameFile;
3090                 m->openOutputFile(nameFileName, nameFile);
3091                 
3092                 for(int i=0;i<numOTUs;i++){
3093                         
3094                         if (m->control_pressed) { break; }
3095                         
3096                         if(otuCounts[i] > 0){
3097                                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
3098                                 
3099                                 for(int j=1;j<nSeqsPerOTU[i];j++){
3100                                         nameFile << ',' << seqNameVector[aaI[i][j]];
3101                                 }
3102                                 
3103                                 nameFile << endl;
3104                         }
3105                 }
3106                 nameFile.close();
3107                 outputNames.push_back(nameFileName);
3108                 
3109                 
3110                 if(thisCompositeNamesFileName != ""){
3111                         m->appendFiles(nameFileName, thisCompositeNamesFileName);
3112                 }               
3113         }
3114         catch(exception& e) {
3115                 m->errorOut(e, "ShhherCommand", "writeNames");
3116                 exit(1);        
3117         }               
3118 }
3119
3120 /**************************************************************************************************/
3121
3122 void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
3123         try {
3124                 string thisOutputDir = outputDir;
3125                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3126                 string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
3127                 string groupFileName = fileRoot + "shhh.groups";
3128                 ofstream groupFile;
3129                 m->openOutputFile(groupFileName, groupFile);
3130                 
3131                 for(int i=0;i<numSeqs;i++){
3132                         if (m->control_pressed) { break; }
3133                         groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
3134                 }
3135                 groupFile.close();
3136                 outputNames.push_back(groupFileName);
3137         
3138         }
3139         catch(exception& e) {
3140                 m->errorOut(e, "ShhherCommand", "writeGroups");
3141                 exit(1);        
3142         }               
3143 }
3144
3145 /**************************************************************************************************/
3146
3147 void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
3148         try {
3149                 string thisOutputDir = outputDir;
3150                 if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
3151                 string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
3152                 ofstream otuCountsFile;
3153                 m->openOutputFile(otuCountsFileName, otuCountsFile);
3154                 
3155                 string bases = flowOrder;
3156                 
3157                 for(int i=0;i<numOTUs;i++){
3158                         
3159                         if (m->control_pressed) {
3160                                 break;
3161                         }
3162                         //output the translated version of the centroid sequence for the otu
3163                         if(otuCounts[i] > 0){
3164                                 int index = centroids[i];
3165                                 
3166                                 otuCountsFile << "ideal\t";
3167                                 for(int j=8;j<numFlowCells;j++){
3168                                         char base = bases[j % 4];
3169                                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
3170                                                 otuCountsFile << base;
3171                                         }
3172                                 }
3173                                 otuCountsFile << endl;
3174                                 
3175                                 for(int j=0;j<nSeqsPerOTU[i];j++){
3176                                         int sequence = aaI[i][j];
3177                                         otuCountsFile << seqNameVector[sequence] << '\t';
3178                                         
3179                                         string newSeq = "";
3180                                         
3181                                         for(int k=0;k<lengths[sequence];k++){
3182                                                 char base = bases[k % 4];
3183                                                 int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
3184                         
3185                                                 for(int s=0;s<freq;s++){
3186                                                         newSeq += base;
3187                                                         //otuCountsFile << base;
3188                                                 }
3189                                         }
3190                                         otuCountsFile << newSeq.substr(4) << endl;
3191                                 }
3192                                 otuCountsFile << endl;
3193                         }
3194                 }
3195                 otuCountsFile.close();
3196                 outputNames.push_back(otuCountsFileName);
3197         
3198         }
3199         catch(exception& e) {
3200                 m->errorOut(e, "ShhherCommand", "writeClusters");
3201                 exit(1);        
3202         }               
3203 }
3204
3205 /**************************************************************************************************/
3206
3207 void ShhherCommand::getSingleLookUp(){
3208         try{
3209                 //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
3210                 singleLookUp.assign(HOMOPS * NUMBINS, 0);
3211                 
3212                 int index = 0;
3213                 ifstream lookUpFile;
3214                 m->openInputFile(lookupFileName, lookUpFile);
3215                 
3216                 for(int i=0;i<HOMOPS;i++){
3217                         
3218                         if (m->control_pressed) { break; }
3219                         
3220                         float logFracFreq;
3221                         lookUpFile >> logFracFreq;
3222                         
3223                         for(int j=0;j<NUMBINS;j++)      {
3224                                 lookUpFile >> singleLookUp[index];
3225                                 index++;
3226                         }
3227                 }       
3228                 lookUpFile.close();
3229         }
3230         catch(exception& e) {
3231                 m->errorOut(e, "ShhherCommand", "getSingleLookUp");
3232                 exit(1);
3233         }
3234 }
3235
3236 /**************************************************************************************************/
3237
3238 void ShhherCommand::getJointLookUp(){
3239         try{
3240                 
3241                 //      the most likely joint probability (-log) that two intenities have the same polymer length
3242                 jointLookUp.resize(NUMBINS * NUMBINS, 0);
3243                 
3244                 for(int i=0;i<NUMBINS;i++){
3245                         
3246                         if (m->control_pressed) { break; }
3247                         
3248                         for(int j=0;j<NUMBINS;j++){             
3249                                 
3250                                 double minSum = 100000000;
3251                                 
3252                                 for(int k=0;k<HOMOPS;k++){
3253                                         double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
3254                                         
3255                                         if(sum < minSum)        {       minSum = sum;           }
3256                                 }       
3257                                 jointLookUp[i * NUMBINS + j] = minSum;
3258                         }
3259                 }
3260         }
3261         catch(exception& e) {
3262                 m->errorOut(e, "ShhherCommand", "getJointLookUp");
3263                 exit(1);
3264         }
3265 }
3266
3267 /**************************************************************************************************/
3268
3269 double ShhherCommand::getProbIntensity(int intIntensity){                          
3270         try{
3271
3272                 double minNegLogProb = 100000000; 
3273
3274                 
3275                 for(int i=0;i<HOMOPS;i++){//loop signal strength
3276                         
3277                         if (m->control_pressed) { break; }
3278                         
3279                         float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
3280                         if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
3281                 }
3282                 
3283                 return minNegLogProb;
3284         }
3285         catch(exception& e) {
3286                 m->errorOut(e, "ShhherCommand", "getProbIntensity");
3287                 exit(1);
3288         }
3289 }
3290
3291
3292
3293