]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.cpp
fixed bug in phylo.diversity rooting. added filename patterns and create filename...
[mothur.git] / shhhercommand.cpp
1 /*
2  *  shhher.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ShhherCommand::setParameters(){  
14         try {
15                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none","fasta-name-group-counts-qfile",false,false,true); parameters.push_back(pflow);
16                 CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none","fasta-name-group-counts-qfile",false,false,true); parameters.push_back(pfile);
17                 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup);
18                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
20                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(pmaxiter);
21         CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
22                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
23                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
24                 CommandParameter porder("order", "String", "", "", "", "", "","",false,false); parameters.push_back(porder);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ShhherCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string ShhherCommand::getHelpString(){  
39         try {
40                 string helpString = "";
41                 helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
42                 return helpString;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ShhherCommand", "getHelpString");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ShhherCommand::getOutputPattern(string type) {
51     try {
52         string pattern = "";
53         
54         if (type == "fasta")            {   pattern = "[filename],shhh.fasta";   }
55         else if (type == "name")    {   pattern = "[filename],shhh.names";   }
56         else if (type == "group")        {   pattern = "[filename],shhh.groups";   }
57         else if (type == "counts")        {   pattern = "[filename],shhh.counts";   }
58         else if (type == "qfile")        {   pattern = "[filename],shhh.qual";   }
59         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
60         
61         return pattern;
62     }
63     catch(exception& e) {
64         m->errorOut(e, "ShhherCommand", "getOutputPattern");
65         exit(1);
66     }
67 }
68 //**********************************************************************************************************************
69
70 ShhherCommand::ShhherCommand(){ 
71         try {
72                 abort = true; calledHelp = true;
73                 setParameters();
74                 
75                 //initialize outputTypes
76                 vector<string> tempOutNames;
77                 outputTypes["fasta"] = tempOutNames;
78         outputTypes["name"] = tempOutNames;
79         outputTypes["group"] = tempOutNames;
80         outputTypes["counts"] = tempOutNames;
81         outputTypes["qfile"] = tempOutNames;
82
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
86                 exit(1);
87         }
88 }
89
90 //**********************************************************************************************************************
91
92 ShhherCommand::ShhherCommand(string option) {
93         try {
94         
95 #ifdef USE_MPI
96                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
97                 MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
98         
99                 if(pid == 0){
100 #endif
101                 abort = false; calledHelp = false;   
102                 
103                 //allow user to run help
104                 if(option == "help") { help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string,string> parameters = parser.getParameters();
112                         
113                         ValidParameters validParameter;
114                         map<string,string>::iterator it;
115                         
116                         //check to make sure all parameters are valid for command
117                         for (it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122             vector<string> tempOutNames;
123             outputTypes["fasta"] = tempOutNames;
124             outputTypes["name"] = tempOutNames;
125             outputTypes["group"] = tempOutNames;
126             outputTypes["counts"] = tempOutNames;
127             outputTypes["qfile"] = tempOutNames;
128
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("flow");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
141                                 }
142                                 
143                                 it = parameters.find("lookup");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
149                                 }
150
151                                 it = parameters.find("file");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
157                                 }
158                         }
159             
160             //if the user changes the output directory command factory will send this info to us in the output parameter 
161                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
162             
163                         //check for required parameters
164                         flowFileName = validParameter.validFile(parameters, "flow", true);
165                         flowFilesFileName = validParameter.validFile(parameters, "file", true);
166                         if (flowFileName == "not found" && flowFilesFileName == "not found") {
167                                 m->mothurOut("values for either flow or file must be provided for the shhh.flows command.");
168                                 m->mothurOutEndLine();
169                                 abort = true; 
170                         }
171                         else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
172                         
173                         if(flowFileName != "not found"){
174                                 compositeFASTAFileName = "";    
175                                 compositeNamesFileName = "";    
176                         }
177                         else{
178                                 ofstream temp;
179                 
180                 string thisoutputDir = outputDir;
181                 if (outputDir == "") {  thisoutputDir =  m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it
182                 
183                                 //we want to rip off .files, and also .flow if its there
184                 string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName));
185                 if (fileroot[fileroot.length()-1] == '.') {  fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot
186                 string extension = m->getExtension(fileroot);
187                 if (extension == ".flow") { fileroot = m->getRootName(fileroot);  }
188                 else { fileroot += "."; } //add back if needed
189                 
190                                 compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta";
191                                 m->openOutputFile(compositeFASTAFileName, temp);
192                                 temp.close();
193                                 
194                                 compositeNamesFileName = thisoutputDir + fileroot + "shhh.names";
195                                 m->openOutputFile(compositeNamesFileName, temp);
196                                 temp.close();
197                         }
198             
199             if(flowFilesFileName != "not found"){
200                 string fName;
201                 
202                 ifstream flowFilesFile;
203                 m->openInputFile(flowFilesFileName, flowFilesFile);
204                 while(flowFilesFile){
205                     fName = m->getline(flowFilesFile);
206                     
207                     //test if file is valid
208                     ifstream in;
209                     int ableToOpen = m->openInputFile(fName, in, "noerror");
210                     in.close(); 
211                     if (ableToOpen == 1) {
212                         if (inputDir != "") { //default path is set
213                             string tryPath = inputDir + fName;
214                             m->mothurOut("Unable to open " + fName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
215                             ifstream in2;
216                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
217                             in2.close();
218                             fName = tryPath;
219                         }
220                     }
221                     
222                     if (ableToOpen == 1) {
223                         if (m->getDefaultPath() != "") { //default path is set
224                             string tryPath = m->getDefaultPath() + m->getSimpleName(fName);
225                             m->mothurOut("Unable to open " + fName + ". Trying default " + tryPath); m->mothurOutEndLine();
226                             ifstream in2;
227                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
228                             in2.close();
229                             fName = tryPath;
230                         }
231                     }
232                     
233                     //if you can't open it its not in current working directory or inputDir, try mothur excutable location
234                     if (ableToOpen == 1) {
235                         string exepath = m->argv;
236                         string tempPath = exepath;
237                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
238                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
239                         
240                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(fName);
241                         m->mothurOut("Unable to open " + fName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
242                         ifstream in2;
243                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
244                         in2.close();
245                         fName = tryPath;
246                     }
247                     
248                     if (ableToOpen == 1) {  m->mothurOut("Unable to open " + fName + ". Disregarding. "); m->mothurOutEndLine();  }
249                     else { flowFileVector.push_back(fName); }
250                     m->gobble(flowFilesFile);
251                 }
252                 flowFilesFile.close();
253                 if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
254             }
255             else{
256                 if (outputDir == "") { outputDir = m->hasPath(flowFileName); }
257                 flowFileVector.push_back(flowFileName);
258             }
259                 
260                         //check for optional parameter and set defaults
261                         // ...at some point should added some additional type checking...
262                         string temp;
263                         temp = validParameter.validFile(parameters, "lookup", true);
264                         if (temp == "not found")        {       
265                                 lookupFileName = "LookUp_Titanium.pat"; 
266                                 
267                                 int ableToOpen;
268                                 ifstream in;
269                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
270                                 in.close();     
271                                 
272                                 //if you can't open it, try input location
273                                 if (ableToOpen == 1) {
274                                         if (inputDir != "") { //default path is set
275                                                 string tryPath = inputDir + lookupFileName;
276                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
277                                                 ifstream in2;
278                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
279                                                 in2.close();
280                                                 lookupFileName = tryPath;
281                                         }
282                                 }
283                                 
284                                 //if you can't open it, try default location
285                                 if (ableToOpen == 1) {
286                                         if (m->getDefaultPath() != "") { //default path is set
287                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
288                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
289                                                 ifstream in2;
290                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
291                                                 in2.close();
292                                                 lookupFileName = tryPath;
293                                         }
294                                 }
295                                 
296                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
297                                 if (ableToOpen == 1) {
298                                         string exepath = m->argv;
299                                         string tempPath = exepath;
300                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
301                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
302                                         
303                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
304                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
305                                         ifstream in2;
306                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
307                                         in2.close();
308                                         lookupFileName = tryPath;
309                                 }
310                                 
311                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
312                         }
313                         else if(temp == "not open")     {       
314                                 
315                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
316                                 
317                                 //if you can't open it its not inputDir, try mothur excutable location
318                                 string exepath = m->argv;
319                                 string tempPath = exepath;
320                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
321                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
322                                         
323                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
324                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
325                                 ifstream in2;
326                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
327                                 in2.close();
328                                 lookupFileName = tryPath;
329                                 
330                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
331                         }else                                           {       lookupFileName = temp;  }
332                         
333                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
334                         m->setProcessors(temp);
335                         m->mothurConvert(temp, processors);
336
337                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
338                         m->mothurConvert(temp, cutoff); 
339                         
340                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
341                         m->mothurConvert(temp, minDelta); 
342
343                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
344                         m->mothurConvert(temp, maxIters); 
345             
346             temp = validParameter.validFile(parameters, "large", false);        if (temp == "not found"){       temp = "0";             }
347                         m->mothurConvert(temp, largeSize); 
348             if (largeSize != 0) { large = true; }
349             else { large = false;  }
350             if (largeSize < 0) {  m->mothurOut("The value of the large cannot be negative.\n"); }
351             
352 #ifdef USE_MPI
353             if (large) { m->mothurOut("The large parameter is not available with the MPI-Enabled version.\n"); large=false; }                           
354 #endif
355             
356
357                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
358                         m->mothurConvert(temp, sigma); 
359                         
360                         flowOrder = validParameter.validFile(parameters, "order", false);
361                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
362                         else if(flowOrder.length() != 4){
363                                 m->mothurOut("The value of the order option must be four bases long\n");
364                         }
365                         
366                 }
367 #ifdef USE_MPI
368                 }                               
369 #endif
370         }
371         catch(exception& e) {
372                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
373                 exit(1);
374         }
375 }
376 //**********************************************************************************************************************
377 #ifdef USE_MPI
378 int ShhherCommand::execute(){
379         try {
380                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
381                 
382                 int tag = 1976;
383                 MPI_Status status; 
384                         
385                 if(pid == 0){
386
387                         for(int i=1;i<ncpus;i++){
388                                 MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
389                         }
390                         if(abort == 1){ return 0;       }
391
392                         processors = ncpus;
393                         
394                         m->mothurOut("\nGetting preliminary data...\n");
395                         getSingleLookUp();      if (m->control_pressed) { return 0; }
396                         getJointLookUp();       if (m->control_pressed) { return 0; }
397                         
398             vector<string> flowFileVector;
399                         if(flowFilesFileName != "not found"){
400                                 string fName;
401                 
402                                 ifstream flowFilesFile;
403                                 m->openInputFile(flowFilesFileName, flowFilesFile);
404                                 while(flowFilesFile){
405                                         fName = m->getline(flowFilesFile);
406                                         flowFileVector.push_back(fName);
407                                         m->gobble(flowFilesFile);
408                                 }
409                         }
410                         else{
411                                 flowFileVector.push_back(flowFileName);
412                         }
413             
414                         int numFiles = flowFileVector.size();
415
416                         for(int i=1;i<ncpus;i++){
417                                 MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
418                         }
419                         
420                         for(int i=0;i<numFiles;i++){
421                                 
422                                 if (m->control_pressed) { break; }
423                                 
424                                 double begClock = clock();
425                                 unsigned long long begTime = time(NULL);
426
427                                 flowFileName = flowFileVector[i];
428                                 
429                                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
430                                 m->mothurOut("Reading flowgrams...\n");
431                                 getFlowData();
432                                 
433                                 if (m->control_pressed) { break; }
434
435                                 m->mothurOut("Identifying unique flowgrams...\n");
436                                 getUniques();
437                                 
438                                 if (m->control_pressed) { break; }
439
440                                 m->mothurOut("Calculating distances between flowgrams...\n");
441                                 char fileName[1024];
442                                 strcpy(fileName, flowFileName.c_str());
443
444                                 for(int i=1;i<ncpus;i++){
445                                         MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
446
447                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
448                                         MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
449                                         MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
450                                         MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
451                                         MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
452                                         MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
453                                         MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
454                                         MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
455                                         MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
456                                         MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
457                                 }                               
458                                                         
459                                 string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
460                                 
461                                 if (m->control_pressed) { break; }
462                                 
463                                 int done;
464                                 for(int i=1;i<ncpus;i++){
465                                         MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
466                                         
467                                         m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
468                                         m->mothurRemove((distFileName + ".temp." + toString(i)));
469                                 }
470
471                                 string namesFileName = createNamesFile();
472                                 
473                                 if (m->control_pressed) { break; }
474                                 
475                                 m->mothurOut("\nClustering flowgrams...\n");
476                                 string listFileName = cluster(distFileName, namesFileName);
477                                 
478                                 if (m->control_pressed) { break; }
479                                 
480                 
481                 
482                                 getOTUData(listFileName);
483
484                                 m->mothurRemove(distFileName);
485                                 m->mothurRemove(namesFileName);
486                                 m->mothurRemove(listFileName);
487                                 
488                                 if (m->control_pressed) { break; }
489                                 
490                                 initPyroCluster();
491
492                                 if (m->control_pressed) { break; }
493                                 
494             
495                                 for(int i=1;i<ncpus;i++){
496                                         MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
497                                         MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
498                                         MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
499                                         MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
500                                 }
501                                 
502                                 if (m->control_pressed) { break; }
503                                 
504                                 double maxDelta = 0;
505                                 int iter = 0;
506                                 
507                                 int numOTUsOnCPU = numOTUs / ncpus;
508                                 int numSeqsOnCPU = numSeqs / ncpus;
509                                 m->mothurOut("\nDenoising flowgrams...\n");
510                                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
511                                 
512                                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
513                                         
514                                         double cycClock = clock();
515                                         unsigned long long cycTime = time(NULL);
516                                         fill();
517                                         
518                                         if (m->control_pressed) { break; }
519
520                                         int total = singleTau.size();
521                                         for(int i=1;i<ncpus;i++){
522                                                 MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
523                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
524                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
525                                                 
526                                                 MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
527                                                 MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
528                                                 MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
529                                                 MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
530                                                 MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
531                                         }
532                                         
533                                         calcCentroidsDriver(0, numOTUsOnCPU);
534                                         
535                                         for(int i=1;i<ncpus;i++){
536                                                 int otuStart = i * numOTUs / ncpus;
537                                                 int otuStop = (i + 1) * numOTUs / ncpus;
538                                                 
539                                                 vector<int> tempCentroids(numOTUs, 0);
540                                                 vector<short> tempChange(numOTUs, 0);
541                                                 
542                                                 MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
543                                                 MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
544                                                 
545                                                 for(int j=otuStart;j<otuStop;j++){
546                                                         centroids[j] = tempCentroids[j];
547                                                         change[j] = tempChange[j];
548                                                 }
549                                         }
550                                                                         
551                                         maxDelta = getNewWeights(); if (m->control_pressed) { break; }
552                                         double nLL = getLikelihood(); if (m->control_pressed) { break; }
553                                         checkCentroids(); if (m->control_pressed) { break; }
554                                         
555                                         for(int i=1;i<ncpus;i++){
556                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
557                                                 MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
558                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
559                                         }
560                                         
561                                         calcNewDistancesParent(0, numSeqsOnCPU);
562
563                                         total = singleTau.size();
564
565                                         for(int i=1;i<ncpus;i++){
566                                                 int childTotal;
567                                                 int seqStart = i * numSeqs / ncpus;
568                                                 int seqStop = (i + 1) * numSeqs / ncpus;
569                                                 
570                                                 MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
571
572                                                 vector<int> childSeqIndex(childTotal, 0);
573                                                 vector<double> childSingleTau(childTotal, 0);
574                                                 vector<double> childDist(numSeqs * numOTUs, 0);
575                                                 vector<int> otuIndex(childTotal, 0);
576                                                 
577                                                 MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
578                                                 MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
579                                                 MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
580                                                 MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
581
582                                                 int oldTotal = total;
583                                                 total += childTotal;
584                                                 singleTau.resize(total, 0);
585                                                 seqIndex.resize(total, 0);
586                                                 seqNumber.resize(total, 0);
587                                                 
588                                                 int childIndex = 0;
589                                                 
590                                                 for(int j=oldTotal;j<total;j++){
591                                                         int otuI = otuIndex[childIndex];
592                                                         int seqI = childSeqIndex[childIndex];
593                                                         
594                                                         singleTau[j] = childSingleTau[childIndex];
595                                                         
596                                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
597                                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
598                                                         nSeqsPerOTU[otuI]++;
599                                                         childIndex++;
600                                                 }
601                                                 
602                                                 int index = seqStart * numOTUs;
603                                                 for(int j=seqStart;j<seqStop;j++){
604                                                         for(int k=0;k<numOTUs;k++){
605                                                                 dist[index] = childDist[index];
606                                                                 index++;
607                                                         }
608                                                 }                                       
609                                         }
610                                         
611                                         iter++;
612                                         
613                                         m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
614
615                                         if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
616                                                 int live = 1;
617                                                 for(int i=1;i<ncpus;i++){
618                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
619                                                 }
620                                         }
621                                         else{
622                                                 int live = 0;
623                                                 for(int i=1;i<ncpus;i++){
624                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
625                                                 }
626                                         }
627                                         
628                                 }       
629                                 
630                                 if (m->control_pressed) { break; }
631                                 
632                                 m->mothurOut("\nFinalizing...\n");
633                                 fill();
634                                 
635                                 if (m->control_pressed) { break; }
636                                 
637                                 setOTUs();
638                                 
639                                 vector<int> otuCounts(numOTUs, 0);
640                                 for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
641                                 calcCentroidsDriver(0, numOTUs);
642                                 
643                                 if (m->control_pressed) { break; }
644                                 
645                                 writeQualities(otuCounts);      if (m->control_pressed) { break; }
646                                 writeSequences(otuCounts);      if (m->control_pressed) { break; }
647                                 writeNames(otuCounts);          if (m->control_pressed) { break; }
648                                 writeClusters(otuCounts);       if (m->control_pressed) { break; }
649                                 writeGroups();                          if (m->control_pressed) { break; }
650                                 
651                                                                  
652                                 m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
653                         }
654                 }
655                 else{
656                         int abort = 1;
657
658                         MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
659                         if(abort){      return 0;       }
660
661                         int numFiles;
662                         MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
663
664                         for(int i=0;i<numFiles;i++){
665                                 
666                                 if (m->control_pressed) { break; }
667                                 
668                                 //Now into the pyrodist part
669                                 bool live = 1;
670
671                                 char fileName[1024];
672                                 MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
673                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
674                                 MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
675                                 MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
676                                 
677                                 flowDataIntI.resize(numSeqs * numFlowCells);
678                                 flowDataPrI.resize(numSeqs * numFlowCells);
679                                 mapUniqueToSeq.resize(numSeqs);
680                                 mapSeqToUnique.resize(numSeqs);
681                                 lengths.resize(numSeqs);
682                                 jointLookUp.resize(NUMBINS * NUMBINS);
683
684                                 MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
685                                 MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
686                                 MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
687                                 MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
688                                 MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
689                                 MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
690                                 MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
691
692                                 flowFileName = string(fileName);
693                                 int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
694                                 int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
695                                 
696                                 string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
697                                 
698                                 if (m->control_pressed) { break; }
699                                 
700                                 int done = 1;
701                                 MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
702
703                                 //Now into the pyronoise part
704                                 MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
705                                 
706                                 singleLookUp.resize(HOMOPS * NUMBINS);
707                                 uniqueFlowgrams.resize(numUniques * numFlowCells);
708                                 weight.resize(numOTUs);
709                                 centroids.resize(numOTUs);
710                                 change.resize(numOTUs);
711                                 dist.assign(numOTUs * numSeqs, 0);
712                                 nSeqsPerOTU.resize(numOTUs);
713                                 cumNumSeqs.resize(numOTUs);
714
715                                 MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
716                                 MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
717                                 MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
718                                 
719                                 int startOTU = pid * numOTUs / ncpus;
720                                 int endOTU = (pid + 1) * numOTUs / ncpus;
721
722                                 int startSeq = pid * numSeqs / ncpus;
723                                 int endSeq = (pid + 1) * numSeqs /ncpus;
724                                 
725                                 int total;
726
727                                 while(live){
728                                         
729                                         if (m->control_pressed) { break; }
730                                         
731                                         MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
732                                         singleTau.assign(total, 0.0000);
733                                         seqNumber.assign(total, 0);
734                                         seqIndex.assign(total, 0);
735
736                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
737                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
738                                         MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
739                                         MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
740                                         MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
741                                         MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
742                                         MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
743
744                                         calcCentroidsDriver(startOTU, endOTU);
745
746                                         MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
747                                         MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
748
749                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
750                                         MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
751                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
752
753                                         vector<int> otuIndex(total, 0);
754                                         calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
755                                         total = otuIndex.size();
756                                         
757                                         MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
758                                         MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
759                                         MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
760                                         MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
761                                         MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
762                                 
763                                         MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
764                                 }
765                         }
766                 }
767                 
768                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
769                 
770                 MPI_Barrier(MPI_COMM_WORLD);
771
772                 
773                 if(compositeFASTAFileName != ""){
774                         outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName);
775                         outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName); 
776                 }
777
778                 m->mothurOutEndLine();
779                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
780                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
781                 m->mothurOutEndLine();
782                 
783                 return 0;
784
785         }
786         catch(exception& e) {
787                 m->errorOut(e, "ShhherCommand", "execute");
788                 exit(1);
789         }
790 }
791 /**************************************************************************************************/
792 string ShhherCommand::createNamesFile(){
793         try{
794                 
795                 vector<string> duplicateNames(numUniques, "");
796                 for(int i=0;i<numSeqs;i++){
797                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
798                 }
799                 map<string, string> variables; 
800                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
801                 string nameFileName = getOutputFileName("name",variables);
802                 
803                 ofstream nameFile;
804                 m->openOutputFile(nameFileName, nameFile);
805                 
806                 for(int i=0;i<numUniques;i++){
807                         
808                         if (m->control_pressed) { break; }
809                         
810             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
811                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
812                 }
813                 
814                 nameFile.close();
815                 return  nameFileName;
816         }
817         catch(exception& e) {
818                 m->errorOut(e, "ShhherCommand", "createNamesFile");
819                 exit(1);
820         }
821 }
822 /**************************************************************************************************/
823
824 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
825         try{            
826                 ostringstream outStream;
827                 outStream.setf(ios::fixed, ios::floatfield);
828                 outStream.setf(ios::dec, ios::basefield);
829                 outStream.setf(ios::showpoint);
830                 outStream.precision(6);
831                 
832                 int begTime = time(NULL);
833                 double begClock = clock();
834                 
835                 for(int i=startSeq;i<stopSeq;i++){
836                         
837                         if (m->control_pressed) { break; }
838                         
839                         for(int j=0;j<i;j++){
840                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
841                                 
842                                 if(flowDistance < 1e-6){
843                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
844                                 }
845                                 else if(flowDistance <= cutoff){
846                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
847                                 }
848                         }
849                         if(i % 100 == 0){
850                                 m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
851                         }
852                 }
853                 
854                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
855                 if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
856                 
857                 if (m->control_pressed) { return fDistFileName; }
858                 
859                 m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
860
861                 ofstream distFile(fDistFileName.c_str());
862                 distFile << outStream.str();            
863                 distFile.close();
864                 
865                 return fDistFileName;
866         }
867         catch(exception& e) {
868                 m->errorOut(e, "ShhherCommand", "flowDistMPI");
869                 exit(1);
870         }
871 }
872 /**************************************************************************************************/
873  
874 void ShhherCommand::getOTUData(string listFileName){
875     try {
876         
877         ifstream listFile;
878         m->openInputFile(listFileName, listFile);
879         string label;
880         
881         listFile >> label >> numOTUs;
882         
883         otuData.assign(numSeqs, 0);
884         cumNumSeqs.assign(numOTUs, 0);
885         nSeqsPerOTU.assign(numOTUs, 0);
886         aaP.clear();aaP.resize(numOTUs);
887         
888         seqNumber.clear();
889         aaI.clear();
890         seqIndex.clear();
891         
892         string singleOTU = "";
893         
894         for(int i=0;i<numOTUs;i++){
895             
896             if (m->control_pressed) { break; }
897             
898             listFile >> singleOTU;
899             
900             istringstream otuString(singleOTU);
901             
902             while(otuString){
903                 
904                 string seqName = "";
905                 
906                 for(int j=0;j<singleOTU.length();j++){
907                     char letter = otuString.get();
908                     
909                     if(letter != ','){
910                         seqName += letter;
911                     }
912                     else{
913                         map<string,int>::iterator nmIt = nameMap.find(seqName);
914                         int index = nmIt->second;
915                         
916                         nameMap.erase(nmIt);
917                         
918                         otuData[index] = i;
919                         nSeqsPerOTU[i]++;
920                         aaP[i].push_back(index);
921                         seqName = "";
922                     }
923                 }
924                 
925                 map<string,int>::iterator nmIt = nameMap.find(seqName);
926                 
927                 int index = nmIt->second;
928                 nameMap.erase(nmIt);
929                 
930                 otuData[index] = i;
931                 nSeqsPerOTU[i]++;
932                 aaP[i].push_back(index);        
933                 
934                 otuString.get();
935             }
936             
937             sort(aaP[i].begin(), aaP[i].end());
938             for(int j=0;j<nSeqsPerOTU[i];j++){
939                 seqNumber.push_back(aaP[i][j]);
940             }
941             for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
942                 aaP[i].push_back(0);
943             }
944             
945             
946         }
947         
948         for(int i=1;i<numOTUs;i++){
949             cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
950         }
951         aaI = aaP;
952         seqIndex = seqNumber;
953         
954         listFile.close();       
955         
956     }
957     catch(exception& e) {
958         m->errorOut(e, "ShhherCommand", "getOTUData");
959         exit(1);        
960     }           
961 }
962
963 /**************************************************************************************************/
964
965 void ShhherCommand::initPyroCluster(){                          
966     try{
967         if (numOTUs < processors) { processors = 1; }
968         
969         if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
970         
971         dist.assign(numSeqs * numOTUs, 0);
972         change.assign(numOTUs, 1);
973         centroids.assign(numOTUs, -1);
974         weight.assign(numOTUs, 0);
975         singleTau.assign(numSeqs, 1.0);
976         
977         nSeqsBreaks.assign(processors+1, 0);
978         nOTUsBreaks.assign(processors+1, 0);
979         
980         if (m->debug) { m->mothurOut("[DEBUG]: made it through the memory allocation.\n"); }
981         
982         nSeqsBreaks[0] = 0;
983         for(int i=0;i<processors;i++){
984             nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
985             nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
986         }
987         nSeqsBreaks[processors] = numSeqs;
988         nOTUsBreaks[processors] = numOTUs;
989     }
990     catch(exception& e) {
991         m->errorOut(e, "ShhherCommand", "initPyroCluster");
992         exit(1);        
993     }
994 }
995
996 /**************************************************************************************************/
997
998 void ShhherCommand::fill(){
999     try {
1000         int index = 0;
1001         for(int i=0;i<numOTUs;i++){
1002             
1003             if (m->control_pressed) { break; }
1004             
1005             cumNumSeqs[i] = index;
1006             for(int j=0;j<nSeqsPerOTU[i];j++){
1007                 seqNumber[index] = aaP[i][j];
1008                 seqIndex[index] = aaI[i][j];
1009                 
1010                 index++;
1011             }
1012         }
1013     }
1014     catch(exception& e) {
1015         m->errorOut(e, "ShhherCommand", "fill");
1016         exit(1);        
1017     }           
1018 }
1019
1020 /**************************************************************************************************/
1021  
1022 void ShhherCommand::getFlowData(){
1023     try{
1024         ifstream flowFile;
1025         m->openInputFile(flowFileName, flowFile);
1026         
1027         string seqName;
1028         seqNameVector.clear();
1029         lengths.clear();
1030         flowDataIntI.clear();
1031         nameMap.clear();
1032         
1033         
1034         int currentNumFlowCells;
1035         
1036         float intensity;
1037         
1038         string numFlowTest;
1039         flowFile >> numFlowTest;
1040         
1041         if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
1042         else { convert(numFlowTest, numFlowCells); }
1043         
1044         int index = 0;//pcluster
1045         while(!flowFile.eof()){
1046             
1047             if (m->control_pressed) { break; }
1048             
1049             flowFile >> seqName >> currentNumFlowCells;
1050             lengths.push_back(currentNumFlowCells);
1051             
1052             seqNameVector.push_back(seqName);
1053             nameMap[seqName] = index++;//pcluster
1054             
1055             for(int i=0;i<numFlowCells;i++){
1056                 flowFile >> intensity;
1057                 if(intensity > 9.99)    {       intensity = 9.99;       }
1058                 int intI = int(100 * intensity + 0.0001);
1059                 flowDataIntI.push_back(intI);
1060             }
1061             m->gobble(flowFile);
1062         }
1063         flowFile.close();
1064         
1065         numSeqs = seqNameVector.size();         
1066         
1067         for(int i=0;i<numSeqs;i++){
1068             
1069             if (m->control_pressed) { break; }
1070             
1071             int iNumFlowCells = i * numFlowCells;
1072             for(int j=lengths[i];j<numFlowCells;j++){
1073                 flowDataIntI[iNumFlowCells + j] = 0;
1074             }
1075         }
1076         
1077     }
1078     catch(exception& e) {
1079         m->errorOut(e, "ShhherCommand", "getFlowData");
1080         exit(1);
1081     }
1082 }
1083 /**************************************************************************************************/
1084 void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
1085         
1086         try{
1087                 vector<double> newTau(numOTUs,0);
1088                 vector<double> norms(numSeqs, 0);
1089                 otuIndex.clear();
1090                 seqIndex.clear();
1091                 singleTau.clear();
1092                 
1093                 for(int i=startSeq;i<stopSeq;i++){
1094                         
1095                         if (m->control_pressed) { break; }
1096                         
1097                         double offset = 1e8;
1098                         int indexOffset = i * numOTUs;
1099                         
1100                         for(int j=0;j<numOTUs;j++){
1101                                 
1102                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1103                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1104                                 }
1105                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1106                                         offset = dist[indexOffset + j];
1107                                 }
1108                         }
1109                         
1110                         for(int j=0;j<numOTUs;j++){
1111                                 if(weight[j] > MIN_WEIGHT){
1112                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1113                                         norms[i] += newTau[j];
1114                                 }
1115                                 else{
1116                                         newTau[j] = 0.0;
1117                                 }
1118                         }
1119                         
1120                         for(int j=0;j<numOTUs;j++){
1121                 
1122                                 newTau[j] /= norms[i];
1123                                 
1124                                 if(newTau[j] > MIN_TAU){
1125                                         otuIndex.push_back(j);
1126                                         seqIndex.push_back(i);
1127                                         singleTau.push_back(newTau[j]);
1128                                 }
1129                         }
1130                         
1131                 }
1132         }
1133         catch(exception& e) {
1134                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
1135                 exit(1);        
1136         }               
1137 }
1138
1139 /**************************************************************************************************/
1140
1141 void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
1142     
1143     try{
1144         
1145         int total = 0;
1146         vector<double> newTau(numOTUs,0);
1147         vector<double> norms(numSeqs, 0);
1148         nSeqsPerOTU.assign(numOTUs, 0);
1149         
1150         for(int i=startSeq;i<stopSeq;i++){
1151             
1152             if (m->control_pressed) { break; }
1153             
1154             int indexOffset = i * numOTUs;
1155             
1156             double offset = 1e8;
1157             
1158             for(int j=0;j<numOTUs;j++){
1159                 
1160                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1161                     dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1162                 }
1163                 
1164                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1165                     offset = dist[indexOffset + j];
1166                 }
1167             }
1168             
1169             for(int j=0;j<numOTUs;j++){
1170                 if(weight[j] > MIN_WEIGHT){
1171                     newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1172                     norms[i] += newTau[j];
1173                 }
1174                 else{
1175                     newTau[j] = 0.0;
1176                 }
1177             }
1178             
1179             for(int j=0;j<numOTUs;j++){
1180                 newTau[j] /= norms[i];
1181             }
1182             
1183             for(int j=0;j<numOTUs;j++){
1184                 if(newTau[j] > MIN_TAU){
1185                     
1186                     int oldTotal = total;
1187                     
1188                     total++;
1189                     
1190                     singleTau.resize(total, 0);
1191                     seqNumber.resize(total, 0);
1192                     seqIndex.resize(total, 0);
1193                     
1194                     singleTau[oldTotal] = newTau[j];
1195                     
1196                     aaP[j][nSeqsPerOTU[j]] = oldTotal;
1197                     aaI[j][nSeqsPerOTU[j]] = i;
1198                     nSeqsPerOTU[j]++;
1199                 }
1200             }
1201             
1202         }
1203         
1204     }
1205     catch(exception& e) {
1206         m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
1207         exit(1);        
1208     }           
1209 }
1210
1211 /**************************************************************************************************/
1212
1213 void ShhherCommand::setOTUs(){
1214     
1215     try {
1216         vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
1217         
1218         for(int i=0;i<numOTUs;i++){
1219             
1220             if (m->control_pressed) { break; }
1221             
1222             for(int j=0;j<nSeqsPerOTU[i];j++){
1223                 int index = cumNumSeqs[i] + j;
1224                 double tauValue = singleTau[seqNumber[index]];
1225                 int sIndex = seqIndex[index];
1226                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
1227             }
1228         }
1229         
1230         for(int i=0;i<numSeqs;i++){
1231             double maxTau = -1.0000;
1232             int maxOTU = -1;
1233             for(int j=0;j<numOTUs;j++){
1234                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
1235                     maxTau = bigTauMatrix[i * numOTUs + j];
1236                     maxOTU = j;
1237                 }
1238             }
1239             
1240             otuData[i] = maxOTU;
1241         }
1242         
1243         nSeqsPerOTU.assign(numOTUs, 0);         
1244         
1245         for(int i=0;i<numSeqs;i++){
1246             int index = otuData[i];
1247             
1248             singleTau[i] = 1.0000;
1249             dist[i] = 0.0000;
1250             
1251             aaP[index][nSeqsPerOTU[index]] = i;
1252             aaI[index][nSeqsPerOTU[index]] = i;
1253             
1254             nSeqsPerOTU[index]++;
1255         }
1256         fill(); 
1257     }
1258     catch(exception& e) {
1259         m->errorOut(e, "ShhherCommand", "setOTUs");
1260         exit(1);        
1261     }           
1262 }
1263
1264 /**************************************************************************************************/
1265  
1266 void ShhherCommand::getUniques(){
1267     try{
1268         
1269         
1270         numUniques = 0;
1271         uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
1272         uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
1273         uniqueLengths.assign(numSeqs, 0);
1274         mapSeqToUnique.assign(numSeqs, -1);
1275         mapUniqueToSeq.assign(numSeqs, -1);
1276         
1277         vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
1278         
1279         for(int i=0;i<numSeqs;i++){
1280             
1281             if (m->control_pressed) { break; }
1282             
1283             int index = 0;
1284             
1285             vector<short> current(numFlowCells);
1286             for(int j=0;j<numFlowCells;j++){
1287                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
1288             }
1289             
1290             for(int j=0;j<numUniques;j++){
1291                 int offset = j * numFlowCells;
1292                 bool toEnd = 1;
1293                 
1294                 int shorterLength;
1295                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
1296                 else                                                            {       shorterLength = uniqueLengths[j];       }
1297                 
1298                 for(int k=0;k<shorterLength;k++){
1299                     if(current[k] != uniqueFlowgrams[offset + k]){
1300                         toEnd = 0;
1301                         break;
1302                     }
1303                 }
1304                 
1305                 if(toEnd){
1306                     mapSeqToUnique[i] = j;
1307                     uniqueCount[j]++;
1308                     index = j;
1309                     if(lengths[i] > uniqueLengths[j])   {       uniqueLengths[j] = lengths[i];  }
1310                     break;
1311                 }
1312                 index++;
1313             }
1314             
1315             if(index == numUniques){
1316                 uniqueLengths[numUniques] = lengths[i];
1317                 uniqueCount[numUniques] = 1;
1318                 mapSeqToUnique[i] = numUniques;//anMap
1319                 mapUniqueToSeq[numUniques] = i;//anF
1320                 
1321                 for(int k=0;k<numFlowCells;k++){
1322                     uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
1323                     uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
1324                 }
1325                 
1326                 numUniques++;
1327             }
1328         }
1329         uniqueFlowDataIntI.resize(numFlowCells * numUniques);
1330         uniqueLengths.resize(numUniques);       
1331         
1332         flowDataPrI.resize(numSeqs * numFlowCells, 0);
1333         for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
1334     }
1335     catch(exception& e) {
1336         m->errorOut(e, "ShhherCommand", "getUniques");
1337         exit(1);
1338     }
1339 }
1340
1341 /**************************************************************************************************/
1342
1343 float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
1344     try{
1345         int minLength = lengths[mapSeqToUnique[seqA]];
1346         if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
1347         
1348         int ANumFlowCells = seqA * numFlowCells;
1349         int BNumFlowCells = seqB * numFlowCells;
1350         
1351         float dist = 0;
1352         
1353         for(int i=0;i<minLength;i++){
1354             
1355             if (m->control_pressed) { break; }
1356             
1357             int flowAIntI = flowDataIntI[ANumFlowCells + i];
1358             float flowAPrI = flowDataPrI[ANumFlowCells + i];
1359             
1360             int flowBIntI = flowDataIntI[BNumFlowCells + i];
1361             float flowBPrI = flowDataPrI[BNumFlowCells + i];
1362             dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
1363         }
1364         
1365         dist /= (float) minLength;
1366         return dist;
1367     }
1368     catch(exception& e) {
1369         m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
1370         exit(1);
1371     }
1372 }
1373
1374 //**********************************************************************************************************************/
1375
1376 string ShhherCommand::cluster(string distFileName, string namesFileName){
1377     try {
1378         
1379         ReadMatrix* read = new ReadColumnMatrix(distFileName);  
1380                 read->setCutoff(cutoff);
1381                 
1382                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
1383                 clusterNameMap->readMap();
1384                 read->read(clusterNameMap);
1385         
1386                 ListVector* list = read->getListVector();
1387                 SparseDistanceMatrix* matrix = read->getDMatrix();
1388                 
1389                 delete read; 
1390                 delete clusterNameMap; 
1391         
1392         RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
1393         
1394         Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
1395         string tag = cluster->getTag();
1396         
1397         double clusterCutoff = cutoff;
1398         while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
1399             
1400             if (m->control_pressed) { break; }
1401             
1402             cluster->update(clusterCutoff);
1403         }
1404         
1405         list->setLabel(toString(cutoff));
1406         
1407         string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
1408         ofstream listFile;
1409         m->openOutputFile(listFileName, listFile);
1410         list->print(listFile);
1411         listFile.close();
1412         
1413         delete matrix;  delete cluster; delete rabund; delete list;
1414         
1415         return listFileName;
1416     }
1417     catch(exception& e) {
1418         m->errorOut(e, "ShhherCommand", "cluster");
1419         exit(1);        
1420     }           
1421 }
1422
1423 /**************************************************************************************************/
1424
1425 void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
1426     
1427     //this function gets the most likely homopolymer length at a flow position for a group of sequences
1428     //within an otu
1429     
1430     try{
1431         
1432         for(int i=start;i<finish;i++){
1433             
1434             if (m->control_pressed) { break; }
1435             
1436             double count = 0;
1437             int position = 0;
1438             int minFlowGram = 100000000;
1439             double minFlowValue = 1e8;
1440             change[i] = 0; //FALSE
1441             
1442             for(int j=0;j<nSeqsPerOTU[i];j++){
1443                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1444             }
1445             
1446             if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1447                 vector<double> adF(nSeqsPerOTU[i]);
1448                 vector<int> anL(nSeqsPerOTU[i]);
1449                 
1450                 for(int j=0;j<nSeqsPerOTU[i];j++){
1451                     int index = cumNumSeqs[i] + j;
1452                     int nI = seqIndex[index];
1453                     int nIU = mapSeqToUnique[nI];
1454                     
1455                     int k;
1456                     for(k=0;k<position;k++){
1457                         if(nIU == anL[k]){
1458                             break;
1459                         }
1460                     }
1461                     if(k == position){
1462                         anL[position] = nIU;
1463                         adF[position] = 0.0000;
1464                         position++;
1465                     }                                           
1466                 }
1467                 
1468                 for(int j=0;j<nSeqsPerOTU[i];j++){
1469                     int index = cumNumSeqs[i] + j;
1470                     int nI = seqIndex[index];
1471                     
1472                     double tauValue = singleTau[seqNumber[index]];
1473                     
1474                     for(int k=0;k<position;k++){
1475                         double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
1476                         adF[k] += dist * tauValue;
1477                     }
1478                 }
1479                 
1480                 for(int j=0;j<position;j++){
1481                     if(adF[j] < minFlowValue){
1482                         minFlowGram = j;
1483                         minFlowValue = adF[j];
1484                     }
1485                 }
1486                 
1487                 if(centroids[i] != anL[minFlowGram]){
1488                     change[i] = 1;
1489                     centroids[i] = anL[minFlowGram];
1490                 }
1491             }
1492             else if(centroids[i] != -1){
1493                 change[i] = 1;
1494                 centroids[i] = -1;                      
1495             }
1496         }
1497     }
1498     catch(exception& e) {
1499         m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1500         exit(1);        
1501     }           
1502 }
1503
1504 /**************************************************************************************************/
1505
1506 double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
1507     try{
1508         
1509         int flowAValue = cent * numFlowCells;
1510         int flowBValue = flow * numFlowCells;
1511         
1512         double dist = 0;
1513         
1514         for(int i=0;i<length;i++){
1515             dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1516             flowAValue++;
1517             flowBValue++;
1518         }
1519         
1520         return dist / (double)length;
1521     }
1522     catch(exception& e) {
1523         m->errorOut(e, "ShhherCommand", "getDistToCentroid");
1524         exit(1);        
1525     }           
1526 }
1527
1528 /**************************************************************************************************/
1529
1530 double ShhherCommand::getNewWeights(){
1531     try{
1532         
1533         double maxChange = 0;
1534         
1535         for(int i=0;i<numOTUs;i++){
1536             
1537             if (m->control_pressed) { break; }
1538             
1539             double difference = weight[i];
1540             weight[i] = 0;
1541             
1542             for(int j=0;j<nSeqsPerOTU[i];j++){
1543                 int index = cumNumSeqs[i] + j;
1544                 double tauValue = singleTau[seqNumber[index]];
1545                 weight[i] += tauValue;
1546             }
1547             
1548             difference = fabs(weight[i] - difference);
1549             if(difference > maxChange){ maxChange = difference; }
1550         }
1551         return maxChange;
1552     }
1553     catch(exception& e) {
1554         m->errorOut(e, "ShhherCommand", "getNewWeights");
1555         exit(1);        
1556     }           
1557 }
1558  
1559  /**************************************************************************************************/
1560  
1561 double ShhherCommand::getLikelihood(){
1562     
1563     try{
1564         
1565         vector<long double> P(numSeqs, 0);
1566         int effNumOTUs = 0;
1567         
1568         for(int i=0;i<numOTUs;i++){
1569             if(weight[i] > MIN_WEIGHT){
1570                 effNumOTUs++;
1571             }
1572         }
1573         
1574         string hold;
1575         for(int i=0;i<numOTUs;i++){
1576             
1577             if (m->control_pressed) { break; }
1578             
1579             for(int j=0;j<nSeqsPerOTU[i];j++){
1580                 int index = cumNumSeqs[i] + j;
1581                 int nI = seqIndex[index];
1582                 double singleDist = dist[seqNumber[index]];
1583                 
1584                 P[nI] += weight[i] * exp(-singleDist * sigma);
1585             }
1586         }
1587         double nLL = 0.00;
1588         for(int i=0;i<numSeqs;i++){
1589             if(P[i] == 0){      P[i] = DBL_EPSILON;     }
1590             
1591             nLL += -log(P[i]);
1592         }
1593         
1594         nLL = nLL -(double)numSeqs * log(sigma);
1595         
1596         return nLL; 
1597     }
1598     catch(exception& e) {
1599         m->errorOut(e, "ShhherCommand", "getNewWeights");
1600         exit(1);        
1601     }           
1602 }
1603
1604 /**************************************************************************************************/
1605
1606 void ShhherCommand::checkCentroids(){
1607     try{
1608         vector<int> unique(numOTUs, 1);
1609         
1610         for(int i=0;i<numOTUs;i++){
1611             if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
1612                 unique[i] = -1;
1613             }
1614         }
1615         
1616         for(int i=0;i<numOTUs;i++){
1617             
1618             if (m->control_pressed) { break; }
1619             
1620             if(unique[i] == 1){
1621                 for(int j=i+1;j<numOTUs;j++){
1622                     if(unique[j] == 1){
1623                         
1624                         if(centroids[j] == centroids[i]){
1625                             unique[j] = 0;
1626                             centroids[j] = -1;
1627                             
1628                             weight[i] += weight[j];
1629                             weight[j] = 0.0;
1630                         }
1631                     }
1632                 }
1633             }
1634         }
1635     }
1636     catch(exception& e) {
1637         m->errorOut(e, "ShhherCommand", "checkCentroids");
1638         exit(1);        
1639     }           
1640 }
1641  /**************************************************************************************************/
1642
1643
1644  
1645 void ShhherCommand::writeQualities(vector<int> otuCounts){
1646     
1647     try {
1648         string thisOutputDir = outputDir;
1649         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1650         map<string, string> variables; 
1651                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
1652         string qualityFileName = getOutputFileName("qfile",variables);
1653         
1654         ofstream qualityFile;
1655         m->openOutputFile(qualityFileName, qualityFile);
1656         
1657         qualityFile.setf(ios::fixed, ios::floatfield);
1658         qualityFile.setf(ios::showpoint);
1659         qualityFile << setprecision(6);
1660         
1661         vector<vector<int> > qualities(numOTUs);
1662         vector<double> pr(HOMOPS, 0);
1663         
1664         
1665         for(int i=0;i<numOTUs;i++){
1666             
1667             if (m->control_pressed) { break; }
1668             
1669             int index = 0;
1670             int base = 0;
1671             
1672             if(nSeqsPerOTU[i] > 0){
1673                 qualities[i].assign(1024, -1);
1674                 
1675                 while(index < numFlowCells){
1676                     double maxPrValue = 1e8;
1677                     short maxPrIndex = -1;
1678                     double count = 0.0000;
1679                     
1680                     pr.assign(HOMOPS, 0);
1681                     
1682                     for(int j=0;j<nSeqsPerOTU[i];j++){
1683                         int lIndex = cumNumSeqs[i] + j;
1684                         double tauValue = singleTau[seqNumber[lIndex]];
1685                         int sequenceIndex = aaI[i][j];
1686                         short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1687                         
1688                         count += tauValue;
1689                         
1690                         for(int s=0;s<HOMOPS;s++){
1691                             pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
1692                         }
1693                     }
1694                     
1695                     maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1696                     maxPrValue = pr[maxPrIndex];
1697                     
1698                     if(count > MIN_COUNT){
1699                         double U = 0.0000;
1700                         double norm = 0.0000;
1701                         
1702                         for(int s=0;s<HOMOPS;s++){
1703                             norm += exp(-(pr[s] - maxPrValue));
1704                         }
1705                         
1706                         for(int s=1;s<=maxPrIndex;s++){
1707                             int value = 0;
1708                             double temp = 0.0000;
1709                             
1710                             U += exp(-(pr[s-1]-maxPrValue))/norm;
1711                             
1712                             if(U>0.00){
1713                                 temp = log10(U);
1714                             }
1715                             else{
1716                                 temp = -10.1;
1717                             }
1718                             temp = floor(-10 * temp);
1719                             value = (int)floor(temp);
1720                             if(value > 100){    value = 100;    }
1721                             
1722                             qualities[i][base] = (int)value;
1723                             base++;
1724                         }
1725                     }
1726                     
1727                     index++;
1728                 }
1729             }
1730             
1731             
1732             if(otuCounts[i] > 0){
1733                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1734                 
1735                 int j=4;        //need to get past the first four bases
1736                 while(qualities[i][j] != -1){
1737                     qualityFile << qualities[i][j] << ' ';
1738                     j++;
1739                 }
1740                 qualityFile << endl;
1741             }
1742         }
1743         qualityFile.close();
1744         outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName);
1745         
1746     }
1747     catch(exception& e) {
1748         m->errorOut(e, "ShhherCommand", "writeQualities");
1749         exit(1);        
1750     }           
1751 }
1752
1753 /**************************************************************************************************/
1754
1755 void ShhherCommand::writeSequences(vector<int> otuCounts){
1756     try {
1757         string thisOutputDir = outputDir;
1758         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1759         map<string, string> variables; 
1760                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
1761         string fastaFileName = getOutputFileName("fasta",variables);
1762         ofstream fastaFile;
1763         m->openOutputFile(fastaFileName, fastaFile);
1764         
1765         vector<string> names(numOTUs, "");
1766         
1767         for(int i=0;i<numOTUs;i++){
1768             
1769             if (m->control_pressed) { break; }
1770             
1771             int index = centroids[i];
1772             
1773             if(otuCounts[i] > 0){
1774                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1775                 
1776                 string newSeq = "";
1777                 
1778                 for(int j=0;j<numFlowCells;j++){
1779                     
1780                     char base = flowOrder[j % 4];
1781                     for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1782                         newSeq += base;
1783                     }
1784                 }
1785                 
1786                 fastaFile << newSeq.substr(4) << endl;
1787             }
1788         }
1789         fastaFile.close();
1790         
1791         outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
1792         
1793         if(compositeFASTAFileName != ""){
1794             m->appendFiles(fastaFileName, compositeFASTAFileName);
1795         }
1796     }
1797     catch(exception& e) {
1798         m->errorOut(e, "ShhherCommand", "writeSequences");
1799         exit(1);        
1800     }           
1801 }
1802
1803 /**************************************************************************************************/
1804
1805 void ShhherCommand::writeNames(vector<int> otuCounts){
1806     try {
1807         string thisOutputDir = outputDir;
1808         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1809         map<string, string> variables; 
1810                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
1811         string nameFileName = getOutputFileName("name",variables);
1812         ofstream nameFile;
1813         m->openOutputFile(nameFileName, nameFile);
1814         
1815         for(int i=0;i<numOTUs;i++){
1816             
1817             if (m->control_pressed) { break; }
1818             
1819             if(otuCounts[i] > 0){
1820                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1821                 
1822                 for(int j=1;j<nSeqsPerOTU[i];j++){
1823                     nameFile << ',' << seqNameVector[aaI[i][j]];
1824                 }
1825                 
1826                 nameFile << endl;
1827             }
1828         }
1829         nameFile.close();
1830         outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
1831         
1832         
1833         if(compositeNamesFileName != ""){
1834             m->appendFiles(nameFileName, compositeNamesFileName);
1835         }               
1836     }
1837     catch(exception& e) {
1838         m->errorOut(e, "ShhherCommand", "writeNames");
1839         exit(1);        
1840     }           
1841 }
1842
1843 /**************************************************************************************************/
1844
1845 void ShhherCommand::writeGroups(){
1846     try {
1847         string thisOutputDir = outputDir;
1848         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1849         string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
1850         int pos = fileRoot.find_first_of('.');
1851         string fileGroup = fileRoot;
1852         if (pos != string::npos) {  fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1)));  }
1853         map<string, string> variables; 
1854                 variables["[filename]"] = thisOutputDir + fileRoot;
1855         string groupFileName = getOutputFileName("group",variables);
1856         ofstream groupFile;
1857         m->openOutputFile(groupFileName, groupFile);
1858         
1859         for(int i=0;i<numSeqs;i++){
1860             if (m->control_pressed) { break; }
1861             groupFile << seqNameVector[i] << '\t' << fileGroup << endl;
1862         }
1863         groupFile.close();
1864         outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName);
1865         
1866     }
1867     catch(exception& e) {
1868         m->errorOut(e, "ShhherCommand", "writeGroups");
1869         exit(1);        
1870     }           
1871 }
1872
1873 /**************************************************************************************************/
1874
1875 void ShhherCommand::writeClusters(vector<int> otuCounts){
1876     try {
1877         string thisOutputDir = outputDir;
1878         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1879         map<string, string> variables; 
1880                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
1881         string otuCountsFileName = getOutputFileName("counts",variables);
1882         ofstream otuCountsFile;
1883         m->openOutputFile(otuCountsFileName, otuCountsFile);
1884         
1885         string bases = flowOrder;
1886         
1887         for(int i=0;i<numOTUs;i++){
1888             
1889             if (m->control_pressed) {
1890                 break;
1891             }
1892             //output the translated version of the centroid sequence for the otu
1893             if(otuCounts[i] > 0){
1894                 int index = centroids[i];
1895                 
1896                 otuCountsFile << "ideal\t";
1897                 for(int j=8;j<numFlowCells;j++){
1898                     char base = bases[j % 4];
1899                     for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1900                         otuCountsFile << base;
1901                     }
1902                 }
1903                 otuCountsFile << endl;
1904                 
1905                 for(int j=0;j<nSeqsPerOTU[i];j++){
1906                     int sequence = aaI[i][j];
1907                     otuCountsFile << seqNameVector[sequence] << '\t';
1908                     
1909                     string newSeq = "";
1910                     
1911                     for(int k=0;k<lengths[sequence];k++){
1912                         char base = bases[k % 4];
1913                         int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1914                         
1915                         for(int s=0;s<freq;s++){
1916                             newSeq += base;
1917                             //otuCountsFile << base;
1918                         }
1919                     }
1920                     otuCountsFile << newSeq.substr(4) << endl;
1921                 }
1922                 otuCountsFile << endl;
1923             }
1924         }
1925         otuCountsFile.close();
1926         outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName);
1927         
1928     }
1929     catch(exception& e) {
1930         m->errorOut(e, "ShhherCommand", "writeClusters");
1931         exit(1);        
1932     }           
1933 }
1934
1935 #else
1936 //**********************************************************************************************************************
1937
1938 int ShhherCommand::execute(){
1939         try {
1940                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
1941                 
1942                 getSingleLookUp();      if (m->control_pressed) { return 0; }
1943                 getJointLookUp();       if (m->control_pressed) { return 0; }
1944                 
1945         int numFiles = flowFileVector.size();
1946                 
1947         if (numFiles < processors) { processors = numFiles; }
1948         
1949 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1950         if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
1951         else { createProcesses(flowFileVector); } //each processor processes one file
1952 #else
1953         driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
1954 #endif
1955         
1956                 if(compositeFASTAFileName != ""){
1957                         outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName);
1958                         outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName);
1959                 }
1960
1961                 m->mothurOutEndLine();
1962                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
1963                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
1964                 m->mothurOutEndLine();
1965                 
1966                 return 0;
1967         }
1968         catch(exception& e) {
1969                 m->errorOut(e, "ShhherCommand", "execute");
1970                 exit(1);
1971         }
1972 }
1973 #endif
1974 /**************************************************************************************************/
1975
1976 int ShhherCommand::createProcesses(vector<string> filenames){
1977     try {
1978         vector<int> processIDS;
1979                 int process = 1;
1980                 int num = 0;
1981                 
1982                 //sanity check
1983                 if (filenames.size() < processors) { processors = filenames.size(); }
1984                 
1985                 //divide the groups between the processors
1986                 vector<linePair> lines;
1987         vector<int> numFilesToComplete;
1988                 int numFilesPerProcessor = filenames.size() / processors;
1989                 for (int i = 0; i < processors; i++) {
1990                         int startIndex =  i * numFilesPerProcessor;
1991                         int endIndex = (i+1) * numFilesPerProcessor;
1992                         if(i == (processors - 1)){      endIndex = filenames.size();    }
1993                         lines.push_back(linePair(startIndex, endIndex));
1994             numFilesToComplete.push_back((endIndex-startIndex));
1995                 }
1996                 
1997         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1998                 
1999                 //loop through and create all the processes you want
2000                 while (process != processors) {
2001                         int pid = fork();
2002                         
2003                         if (pid > 0) {
2004                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
2005                                 process++;
2006                         }else if (pid == 0){
2007                                 num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
2008                 
2009                 //pass numSeqs to parent
2010                                 ofstream out;
2011                                 string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp";
2012                                 m->openOutputFile(tempFile, out);
2013                                 out << num << endl;
2014                                 out.close();
2015                 
2016                                 exit(0);
2017                         }else { 
2018                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
2019                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
2020                                 exit(0);
2021                         }
2022                 }
2023                 
2024                 //do my part
2025                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
2026                 
2027                 //force parent to wait until all the processes are done
2028                 for (int i=0;i<processIDS.size();i++) { 
2029                         int temp = processIDS[i];
2030                         wait(&temp);
2031                 }
2032         
2033         #else
2034         
2035         //////////////////////////////////////////////////////////////////////////////////////////////////////
2036         
2037         /////////////////////// NOT WORKING, ACCESS VIOLATION ON READ OF FLOWGRAMS IN THREAD /////////////////
2038         
2039         //////////////////////////////////////////////////////////////////////////////////////////////////////
2040                 //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. 
2041                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
2042                 //////////////////////////////////////////////////////////////////////////////////////////////////////
2043                 /*
2044                 vector<shhhFlowsData*> pDataArray; 
2045                 DWORD   dwThreadIdArray[processors-1];
2046                 HANDLE  hThreadArray[processors-1]; 
2047                 
2048                 //Create processor worker threads.
2049                 for( int i=0; i<processors-1; i++ ){
2050                         // Allocate memory for thread data.
2051                         string extension = "";
2052                         if (i != 0) { extension = toString(i) + ".temp"; }
2053                         
2054             shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
2055                         pDataArray.push_back(tempFlow);
2056                         processIDS.push_back(i);
2057             
2058                         hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
2059                 }
2060                 
2061                 //using the main process as a worker saves time and memory
2062                 //do my part
2063                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
2064                 
2065                 //Wait until all threads have terminated.
2066                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
2067                 
2068                 //Close all thread handles and free memory allocations.
2069                 for(int i=0; i < pDataArray.size(); i++){
2070                         for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
2071                         CloseHandle(hThreadArray[i]);
2072                         delete pDataArray[i];
2073                 }
2074                 */
2075         #endif
2076         
2077         for (int i=0;i<processIDS.size();i++) { 
2078             ifstream in;
2079                         string tempFile =  compositeFASTAFileName + toString(processIDS[i]) + ".num.temp";
2080                         m->openInputFile(tempFile, in);
2081                         if (!in.eof()) { 
2082                 int tempNum = 0; 
2083                 in >> tempNum; 
2084                 if (tempNum != numFilesToComplete[i+1]) {
2085                     m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
2086                 }
2087             }
2088                         in.close(); m->mothurRemove(tempFile);
2089             
2090             if (compositeFASTAFileName != "") {
2091                 m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
2092                 m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
2093                 m->mothurRemove((compositeFASTAFileName + toString(processIDS[i]) + ".temp"));
2094                 m->mothurRemove((compositeNamesFileName + toString(processIDS[i]) + ".temp"));
2095             }
2096         }
2097         
2098         return 0;
2099         
2100     }
2101         catch(exception& e) {
2102                 m->errorOut(e, "ShhherCommand", "createProcesses");
2103                 exit(1);
2104         }
2105 }
2106 /**************************************************************************************************/
2107
2108 vector<string> ShhherCommand::parseFlowFiles(string filename){
2109     try {
2110         vector<string> files;
2111         int count = 0;
2112         
2113         ifstream in;
2114         m->openInputFile(filename, in);
2115         
2116         int thisNumFLows = 0;
2117         in >> thisNumFLows; m->gobble(in);
2118         
2119         while (!in.eof()) {
2120             if (m->control_pressed) { break; }
2121             
2122             ofstream out;
2123             string outputFileName = filename + toString(count) + ".temp";
2124             m->openOutputFile(outputFileName, out);
2125             out << thisNumFLows << endl;
2126             files.push_back(outputFileName);
2127             
2128             int numLinesWrote = 0;
2129             for (int i = 0; i < largeSize; i++) {
2130                 if (in.eof()) { break; }
2131                 string line = m->getline(in); m->gobble(in);
2132                 out << line << endl;
2133                 numLinesWrote++;
2134             }
2135             out.close();
2136             
2137             if (numLinesWrote == 0) {  m->mothurRemove(outputFileName); files.pop_back();  }
2138             count++;
2139         }
2140         in.close();
2141         
2142         if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }  files.clear(); }
2143         
2144         m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); 
2145         
2146         return files;
2147     }
2148         catch(exception& e) {
2149                 m->errorOut(e, "ShhherCommand", "parseFlowFiles");
2150                 exit(1);
2151         }
2152 }
2153 /**************************************************************************************************/
2154
2155 int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
2156     try {
2157         
2158         int numCompleted = 0;
2159         
2160         for(int i=start;i<end;i++){
2161                         
2162                         if (m->control_pressed) { break; }
2163                         
2164             vector<string> theseFlowFileNames; theseFlowFileNames.push_back(filenames[i]);
2165             if (large) {  theseFlowFileNames = parseFlowFiles(filenames[i]);  }
2166             
2167             if (m->control_pressed) { break; }
2168             
2169             double begClock = clock();
2170             unsigned long long begTime;
2171             
2172             for (int g = 0; g < theseFlowFileNames.size(); g++) {
2173                 
2174                 string flowFileName = theseFlowFileNames[g];
2175                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
2176                 m->mothurOut("Reading flowgrams...\n");
2177                 
2178                 vector<string> seqNameVector;
2179                 vector<int> lengths;
2180                 vector<short> flowDataIntI;
2181                 vector<double> flowDataPrI;
2182                 map<string, int> nameMap;
2183                 vector<short> uniqueFlowgrams;
2184                 vector<int> uniqueCount;
2185                 vector<int> mapSeqToUnique;
2186                 vector<int> mapUniqueToSeq;
2187                 vector<int> uniqueLengths;
2188                 int numFlowCells;
2189                 
2190                 if (m->debug) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); }
2191                 int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
2192                 
2193                 if (m->control_pressed) { break; }
2194                 
2195                 m->mothurOut("Identifying unique flowgrams...\n");
2196                 int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
2197                 
2198                 if (m->control_pressed) { break; }
2199                 
2200                 m->mothurOut("Calculating distances between flowgrams...\n");
2201                 string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
2202                 begTime = time(NULL);
2203                
2204                 
2205                 flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI); 
2206                 
2207                 m->mothurOutEndLine();
2208                 m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
2209                 
2210                 
2211                 string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
2212                 createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
2213                 
2214                 if (m->control_pressed) { break; }
2215                 
2216                 m->mothurOut("\nClustering flowgrams...\n");
2217                 string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
2218                 cluster(listFileName, distFileName, namesFileName);
2219                 
2220                 if (m->control_pressed) { break; }
2221                 
2222                 vector<int> otuData;
2223                 vector<int> cumNumSeqs;
2224                 vector<int> nSeqsPerOTU;
2225                 vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2226                 vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2227                 vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
2228                 vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
2229                 
2230                 
2231                 int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
2232                 
2233                 if (m->control_pressed) { break; }
2234                 
2235                 m->mothurRemove(distFileName);
2236                 m->mothurRemove(namesFileName);
2237                 m->mothurRemove(listFileName);
2238                 
2239                 vector<double> dist;            //adDist - distance of sequences to centroids
2240                 vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
2241                 vector<int> centroids;          //the representative flowgram for each cluster m
2242                 vector<double> weight;
2243                 vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2244                 vector<int> nSeqsBreaks;
2245                 vector<int> nOTUsBreaks;
2246                 
2247                 if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
2248                 
2249                 dist.assign(numSeqs * numOTUs, 0);
2250                 change.assign(numOTUs, 1);
2251                 centroids.assign(numOTUs, -1);
2252                 weight.assign(numOTUs, 0);
2253                 singleTau.assign(numSeqs, 1.0);
2254                 
2255                 nSeqsBreaks.assign(2, 0);
2256                 nOTUsBreaks.assign(2, 0);
2257                 
2258                 nSeqsBreaks[0] = 0;
2259                 nSeqsBreaks[1] = numSeqs;
2260                 nOTUsBreaks[1] = numOTUs;
2261                 
2262                 if (m->debug) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); }
2263                 
2264                 if (m->control_pressed) { break; }
2265                 
2266                 double maxDelta = 0;
2267                 int iter = 0;
2268                 
2269                 begClock = clock();
2270                 begTime = time(NULL);
2271                 
2272                 m->mothurOut("\nDenoising flowgrams...\n");
2273                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
2274                 
2275                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
2276                     
2277                     if (m->control_pressed) { break; }
2278                     
2279                     double cycClock = clock();
2280                     unsigned long long cycTime = time(NULL);
2281                     fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2282                     
2283                     if (m->control_pressed) { break; }
2284                     
2285                     calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
2286                     
2287                     if (m->control_pressed) { break; }
2288                     
2289                     maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
2290                     
2291                     if (m->control_pressed) { break; }
2292                     
2293                     double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
2294                     
2295                     if (m->control_pressed) { break; }
2296                     
2297                     checkCentroids(numOTUs, centroids, weight);
2298                     
2299                     if (m->control_pressed) { break; }
2300                     
2301                     calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
2302                     
2303                     if (m->control_pressed) { break; }
2304                     
2305                     iter++;
2306                     
2307                     m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
2308                     
2309                 }       
2310                 
2311                 if (m->control_pressed) { break; }
2312                 
2313                 m->mothurOut("\nFinalizing...\n");
2314                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2315                 
2316                 if (m->control_pressed) { break; }
2317                 
2318                 setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
2319                 
2320                 if (m->control_pressed) { break; }
2321                 
2322                 vector<int> otuCounts(numOTUs, 0);
2323                 for(int j=0;j<numSeqs;j++)      {       otuCounts[otuData[j]]++;        }
2324                 
2325                 calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
2326                 
2327                 if (m->control_pressed) { break; }
2328                 
2329                 if ((large) && (g == 0)) {  flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; }
2330                 string thisOutputDir = outputDir;
2331                 if (outputDir == "") {  thisOutputDir = m->hasPath(flowFileName);  }
2332                 map<string, string> variables; 
2333                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
2334                 string qualityFileName = getOutputFileName("qfile",variables);
2335                 string fastaFileName = getOutputFileName("fasta",variables);
2336                 string nameFileName = getOutputFileName("name",variables);
2337                 string otuCountsFileName = getOutputFileName("counts",variables);
2338                 string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
2339                 int pos = fileRoot.find_first_of('.');
2340                 string fileGroup = fileRoot;
2341                 if (pos != string::npos) {  fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1)));  }
2342                 string groupFileName = getOutputFileName("group",variables);
2343
2344                 
2345                 writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
2346                 writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
2347                 writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
2348                 writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                  if (m->control_pressed) { break; }
2349                 writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector);                                          if (m->control_pressed) { break; }
2350                 
2351                 if (large) {
2352                     if (g > 0) {
2353                         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0]));
2354                         m->appendFiles(qualityFileName, getOutputFileName("qfile",variables));
2355                         m->mothurRemove(qualityFileName);
2356                         m->appendFiles(fastaFileName, getOutputFileName("fasta",variables));
2357                         m->mothurRemove(fastaFileName);
2358                         m->appendFiles(nameFileName, getOutputFileName("name",variables));
2359                         m->mothurRemove(nameFileName);
2360                         m->appendFiles(otuCountsFileName, getOutputFileName("counts",variables));
2361                         m->mothurRemove(otuCountsFileName);
2362                         m->appendFiles(groupFileName, getOutputFileName("group",variables));
2363                         m->mothurRemove(groupFileName);
2364                     }
2365                     m->mothurRemove(theseFlowFileNames[g]);
2366                 }
2367                         }
2368             
2369             numCompleted++;
2370                         m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
2371                 }
2372                 
2373         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
2374         
2375         return numCompleted;
2376         
2377     }catch(exception& e) {
2378             m->errorOut(e, "ShhherCommand", "driver");
2379             exit(1);
2380     }
2381 }
2382
2383 /**************************************************************************************************/
2384 int ShhherCommand::getFlowData(string filename, vector<string>& thisSeqNameVector, vector<int>& thisLengths, vector<short>& thisFlowDataIntI, map<string, int>& thisNameMap, int& numFlowCells){
2385         try{
2386        
2387                 ifstream flowFile;
2388        
2389                 m->openInputFile(filename, flowFile);
2390                 
2391                 string seqName;
2392                 int currentNumFlowCells;
2393                 float intensity;
2394         thisSeqNameVector.clear();
2395                 thisLengths.clear();
2396                 thisFlowDataIntI.clear();
2397                 thisNameMap.clear();
2398                 
2399                 string numFlowTest;
2400         flowFile >> numFlowTest;
2401         
2402         if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
2403         else { convert(numFlowTest, numFlowCells); }
2404         
2405         if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); }
2406                 int index = 0;//pcluster
2407                 while(!flowFile.eof()){
2408                         
2409                         if (m->control_pressed) { break; }
2410                         
2411                         flowFile >> seqName >> currentNumFlowCells;
2412             
2413                         thisLengths.push_back(currentNumFlowCells);
2414            
2415                         thisSeqNameVector.push_back(seqName);
2416                         thisNameMap[seqName] = index++;//pcluster
2417             
2418             if (m->debug) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); }
2419             
2420                         for(int i=0;i<numFlowCells;i++){
2421                                 flowFile >> intensity;
2422                                 if(intensity > 9.99)    {       intensity = 9.99;       }
2423                                 int intI = int(100 * intensity + 0.0001);
2424                                 thisFlowDataIntI.push_back(intI);
2425                         }
2426                         m->gobble(flowFile);
2427                 }
2428                 flowFile.close();
2429                 
2430                 int numSeqs = thisSeqNameVector.size();         
2431                 
2432                 for(int i=0;i<numSeqs;i++){
2433                         
2434                         if (m->control_pressed) { break; }
2435                         
2436                         int iNumFlowCells = i * numFlowCells;
2437                         for(int j=thisLengths[i];j<numFlowCells;j++){
2438                                 thisFlowDataIntI[iNumFlowCells + j] = 0;
2439                         }
2440                 }
2441         
2442         return numSeqs;
2443                 
2444         }
2445         catch(exception& e) {
2446                 m->errorOut(e, "ShhherCommand", "getFlowData");
2447                 exit(1);
2448         }
2449 }
2450 /**************************************************************************************************/
2451
2452 int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector<int>& mapUniqueToSeq, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2453         try{            
2454         
2455                 ostringstream outStream;
2456                 outStream.setf(ios::fixed, ios::floatfield);
2457                 outStream.setf(ios::dec, ios::basefield);
2458                 outStream.setf(ios::showpoint);
2459                 outStream.precision(6);
2460                 
2461                 int begTime = time(NULL);
2462                 double begClock = clock();
2463         
2464                 for(int i=0;i<stopSeq;i++){
2465                         
2466                         if (m->control_pressed) { break; }
2467                         
2468                         for(int j=0;j<i;j++){
2469                                 float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
2470                 
2471                                 if(flowDistance < 1e-6){
2472                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
2473                                 }
2474                                 else if(flowDistance <= cutoff){
2475                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
2476                                 }
2477                         }
2478                         if(i % 100 == 0){
2479                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
2480                                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2481                                 m->mothurOutEndLine();
2482                         }
2483                 }
2484                 
2485                 ofstream distFile(distFileName.c_str());
2486                 distFile << outStream.str();            
2487                 distFile.close();
2488                 
2489                 if (m->control_pressed) {}
2490                 else {
2491                         m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
2492                         m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2493                         m->mothurOutEndLine();
2494                 }
2495         
2496         return 0;
2497         }
2498         catch(exception& e) {
2499                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
2500                 exit(1);
2501         }
2502 }
2503 /**************************************************************************************************/
2504
2505 float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2506         try{
2507                 int minLength = lengths[mapSeqToUnique[seqA]];
2508                 if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
2509                 
2510                 int ANumFlowCells = seqA * numFlowCells;
2511                 int BNumFlowCells = seqB * numFlowCells;
2512                 
2513                 float dist = 0;
2514                 
2515                 for(int i=0;i<minLength;i++){
2516                         
2517                         if (m->control_pressed) { break; }
2518                         
2519                         int flowAIntI = flowDataIntI[ANumFlowCells + i];
2520                         float flowAPrI = flowDataPrI[ANumFlowCells + i];
2521                         
2522                         int flowBIntI = flowDataIntI[BNumFlowCells + i];
2523                         float flowBPrI = flowDataPrI[BNumFlowCells + i];
2524                         dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
2525                 }
2526                 
2527                 dist /= (float) minLength;
2528                 return dist;
2529         }
2530         catch(exception& e) {
2531                 m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
2532                 exit(1);
2533         }
2534 }
2535
2536 /**************************************************************************************************/
2537
2538 int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector<short>& uniqueFlowgrams, vector<int>& uniqueCount, vector<int>& uniqueLengths, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2539         try{
2540                 int numUniques = 0;
2541                 uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
2542                 uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
2543                 uniqueLengths.assign(numSeqs, 0);
2544                 mapSeqToUnique.assign(numSeqs, -1);
2545                 mapUniqueToSeq.assign(numSeqs, -1);
2546                 
2547                 vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
2548                 
2549                 for(int i=0;i<numSeqs;i++){
2550                         
2551                         if (m->control_pressed) { break; }
2552                         
2553                         int index = 0;
2554                         
2555                         vector<short> current(numFlowCells);
2556                         for(int j=0;j<numFlowCells;j++){
2557                                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
2558                         }
2559             
2560                         for(int j=0;j<numUniques;j++){
2561                                 int offset = j * numFlowCells;
2562                                 bool toEnd = 1;
2563                                 
2564                                 int shorterLength;
2565                                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
2566                                 else                                                            {       shorterLength = uniqueLengths[j];       }
2567                 
2568                                 for(int k=0;k<shorterLength;k++){
2569                                         if(current[k] != uniqueFlowgrams[offset + k]){
2570                                                 toEnd = 0;
2571                                                 break;
2572                                         }
2573                                 }
2574                                 
2575                                 if(toEnd){
2576                                         mapSeqToUnique[i] = j;
2577                                         uniqueCount[j]++;
2578                                         index = j;
2579                                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
2580                                         break;
2581                                 }
2582                                 index++;
2583                         }
2584                         
2585                         if(index == numUniques){
2586                                 uniqueLengths[numUniques] = lengths[i];
2587                                 uniqueCount[numUniques] = 1;
2588                                 mapSeqToUnique[i] = numUniques;//anMap
2589                                 mapUniqueToSeq[numUniques] = i;//anF
2590                                 
2591                                 for(int k=0;k<numFlowCells;k++){
2592                                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
2593                                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
2594                                 }
2595                                 
2596                                 numUniques++;
2597                         }
2598                 }
2599                 uniqueFlowDataIntI.resize(numFlowCells * numUniques);
2600                 uniqueLengths.resize(numUniques);       
2601                 
2602                 flowDataPrI.resize(numSeqs * numFlowCells, 0);
2603                 for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
2604         
2605         return numUniques;
2606         }
2607         catch(exception& e) {
2608                 m->errorOut(e, "ShhherCommand", "getUniques");
2609                 exit(1);
2610         }
2611 }
2612 /**************************************************************************************************/
2613 int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector<string>& seqNameVector, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq){
2614         try{
2615                 
2616                 vector<string> duplicateNames(numUniques, "");
2617                 for(int i=0;i<numSeqs;i++){
2618                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
2619                 }
2620                 
2621                 ofstream nameFile;
2622                 m->openOutputFile(filename, nameFile);
2623                 
2624                 for(int i=0;i<numUniques;i++){
2625                         
2626                         if (m->control_pressed) { break; }
2627                         
2628             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2629                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2630                 }
2631                 
2632                 nameFile.close();
2633         
2634                 return 0;
2635         }
2636         catch(exception& e) {
2637                 m->errorOut(e, "ShhherCommand", "createNamesFile");
2638                 exit(1);
2639         }
2640 }
2641 //**********************************************************************************************************************
2642
2643 int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){
2644         try {
2645                 
2646                 ReadMatrix* read = new ReadColumnMatrix(distFileName);  
2647                 read->setCutoff(cutoff);
2648                 
2649                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
2650                 clusterNameMap->readMap();
2651                 read->read(clusterNameMap);
2652         
2653                 ListVector* list = read->getListVector();
2654                 SparseDistanceMatrix* matrix = read->getDMatrix();
2655                 
2656                 delete read; 
2657                 delete clusterNameMap; 
2658         
2659                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
2660                 
2661                 Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
2662                 string tag = cluster->getTag();
2663                 
2664                 double clusterCutoff = cutoff;
2665                 while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
2666                         
2667                         if (m->control_pressed) { break; }
2668                         
2669                         cluster->update(clusterCutoff);
2670                 }
2671                 
2672                 list->setLabel(toString(cutoff));
2673                 
2674                 ofstream listFile;
2675                 m->openOutputFile(filename, listFile);
2676                 list->print(listFile);
2677                 listFile.close();
2678                 
2679                 delete matrix;  delete cluster; delete rabund; delete list;
2680         
2681                 return 0;
2682         }
2683         catch(exception& e) {
2684                 m->errorOut(e, "ShhherCommand", "cluster");
2685                 exit(1);        
2686         }               
2687 }
2688 /**************************************************************************************************/
2689
2690 int ShhherCommand::getOTUData(int numSeqs, string fileName,  vector<int>& otuData,
2691                                vector<int>& cumNumSeqs,
2692                                vector<int>& nSeqsPerOTU,
2693                                vector<vector<int> >& aaP,       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2694                                vector<vector<int> >& aaI,       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2695                                vector<int>& seqNumber,          //tMaster->anP:         the sequence id number sorted by OTU
2696                                vector<int>& seqIndex,
2697                                map<string, int>& nameMap){
2698         try {
2699         
2700                 ifstream listFile;
2701                 m->openInputFile(fileName, listFile);
2702                 string label;
2703         int numOTUs;
2704                 
2705                 listFile >> label >> numOTUs;
2706         
2707         if (m->debug) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); }
2708         
2709                 otuData.assign(numSeqs, 0);
2710                 cumNumSeqs.assign(numOTUs, 0);
2711                 nSeqsPerOTU.assign(numOTUs, 0);
2712                 aaP.clear();aaP.resize(numOTUs);
2713                 
2714                 seqNumber.clear();
2715                 aaI.clear();
2716                 seqIndex.clear();
2717                 
2718                 string singleOTU = "";
2719                 
2720                 for(int i=0;i<numOTUs;i++){
2721                         
2722                         if (m->control_pressed) { break; }
2723             if (m->debug) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); }
2724             
2725                         listFile >> singleOTU;
2726                         
2727                         istringstream otuString(singleOTU);
2728             
2729                         while(otuString){
2730                                 
2731                                 string seqName = "";
2732                                 
2733                                 for(int j=0;j<singleOTU.length();j++){
2734                                         char letter = otuString.get();
2735                                         
2736                                         if(letter != ','){
2737                                                 seqName += letter;
2738                                         }
2739                                         else{
2740                                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2741                                                 int index = nmIt->second;
2742                                                 
2743                                                 nameMap.erase(nmIt);
2744                                                 
2745                                                 otuData[index] = i;
2746                                                 nSeqsPerOTU[i]++;
2747                                                 aaP[i].push_back(index);
2748                                                 seqName = "";
2749                                         }
2750                                 }
2751                                 
2752                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2753                 
2754                                 int index = nmIt->second;
2755                                 nameMap.erase(nmIt);
2756                 
2757                                 otuData[index] = i;
2758                                 nSeqsPerOTU[i]++;
2759                                 aaP[i].push_back(index);        
2760                                 
2761                                 otuString.get();
2762                         }
2763                         
2764                         sort(aaP[i].begin(), aaP[i].end());
2765                         for(int j=0;j<nSeqsPerOTU[i];j++){
2766                                 seqNumber.push_back(aaP[i][j]);
2767                         }
2768                         for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
2769                                 aaP[i].push_back(0);
2770                         }
2771                         
2772                         
2773                 }
2774                 
2775                 for(int i=1;i<numOTUs;i++){
2776                         cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
2777                 }
2778                 aaI = aaP;
2779                 seqIndex = seqNumber;
2780                 
2781                 listFile.close();       
2782         
2783         return numOTUs;
2784                 
2785         }
2786         catch(exception& e) {
2787                 m->errorOut(e, "ShhherCommand", "getOTUData");
2788                 exit(1);        
2789         }               
2790 }
2791 /**************************************************************************************************/
2792
2793 int ShhherCommand::calcCentroidsDriver(int numOTUs, 
2794                                           vector<int>& cumNumSeqs,
2795                                           vector<int>& nSeqsPerOTU,
2796                                           vector<int>& seqIndex,
2797                                           vector<short>& change,                //did the centroid sequence change? 0 = no; 1 = yes
2798                                           vector<int>& centroids,               //the representative flowgram for each cluster m
2799                                           vector<double>& singleTau,    //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2800                                           vector<int>& mapSeqToUnique,
2801                                           vector<short>& uniqueFlowgrams,
2802                                           vector<short>& flowDataIntI,
2803                                           vector<int>& lengths,
2804                                           int numFlowCells,
2805                                           vector<int>& seqNumber){                          
2806         
2807         //this function gets the most likely homopolymer length at a flow position for a group of sequences
2808         //within an otu
2809         
2810         try{
2811                 
2812                 for(int i=0;i<numOTUs;i++){
2813                         
2814                         if (m->control_pressed) { break; }
2815                         
2816                         double count = 0;
2817                         int position = 0;
2818                         int minFlowGram = 100000000;
2819                         double minFlowValue = 1e8;
2820                         change[i] = 0; //FALSE
2821                         
2822                         for(int j=0;j<nSeqsPerOTU[i];j++){
2823                                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
2824                         }
2825             
2826                         if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
2827                                 vector<double> adF(nSeqsPerOTU[i]);
2828                                 vector<int> anL(nSeqsPerOTU[i]);
2829                                 
2830                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2831                                         int index = cumNumSeqs[i] + j;
2832                                         int nI = seqIndex[index];
2833                                         int nIU = mapSeqToUnique[nI];
2834                                         
2835                                         int k;
2836                                         for(k=0;k<position;k++){
2837                                                 if(nIU == anL[k]){
2838                                                         break;
2839                                                 }
2840                                         }
2841                                         if(k == position){
2842                                                 anL[position] = nIU;
2843                                                 adF[position] = 0.0000;
2844                                                 position++;
2845                                         }                                               
2846                                 }
2847                                 
2848                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2849                                         int index = cumNumSeqs[i] + j;
2850                                         int nI = seqIndex[index];
2851                                         
2852                                         double tauValue = singleTau[seqNumber[index]];
2853                                         
2854                                         for(int k=0;k<position;k++){
2855                                                 double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
2856                                                 adF[k] += dist * tauValue;
2857                                         }
2858                                 }
2859                                 
2860                                 for(int j=0;j<position;j++){
2861                                         if(adF[j] < minFlowValue){
2862                                                 minFlowGram = j;
2863                                                 minFlowValue = adF[j];
2864                                         }
2865                                 }
2866                                 
2867                                 if(centroids[i] != anL[minFlowGram]){
2868                                         change[i] = 1;
2869                                         centroids[i] = anL[minFlowGram];
2870                                 }
2871                         }
2872                         else if(centroids[i] != -1){
2873                                 change[i] = 1;
2874                                 centroids[i] = -1;                      
2875                         }
2876                 }
2877         
2878         return 0;
2879         }
2880         catch(exception& e) {
2881                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
2882                 exit(1);        
2883         }               
2884 }
2885 /**************************************************************************************************/
2886
2887 double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector<short>& uniqueFlowgrams,
2888                                         vector<short>& flowDataIntI, int numFlowCells){
2889         try{
2890                 
2891                 int flowAValue = cent * numFlowCells;
2892                 int flowBValue = flow * numFlowCells;
2893                 
2894                 double dist = 0;
2895         
2896                 for(int i=0;i<length;i++){
2897                         dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
2898                         flowAValue++;
2899                         flowBValue++;
2900                 }
2901                 
2902                 return dist / (double)length;
2903         }
2904         catch(exception& e) {
2905                 m->errorOut(e, "ShhherCommand", "getDistToCentroid");
2906                 exit(1);        
2907         }               
2908 }
2909 /**************************************************************************************************/
2910
2911 double ShhherCommand::getNewWeights(int numOTUs, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<double>& singleTau, vector<int>& seqNumber, vector<double>& weight){
2912         try{
2913                 
2914                 double maxChange = 0;
2915                 
2916                 for(int i=0;i<numOTUs;i++){
2917                         
2918                         if (m->control_pressed) { break; }
2919                         
2920                         double difference = weight[i];
2921                         weight[i] = 0;
2922                         
2923                         for(int j=0;j<nSeqsPerOTU[i];j++){
2924                                 int index = cumNumSeqs[i] + j;
2925                                 double tauValue = singleTau[seqNumber[index]];
2926                                 weight[i] += tauValue;
2927                         }
2928                         
2929                         difference = fabs(weight[i] - difference);
2930                         if(difference > maxChange){     maxChange = difference; }
2931                 }
2932                 return maxChange;
2933         }
2934         catch(exception& e) {
2935                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2936                 exit(1);        
2937         }               
2938 }
2939
2940 /**************************************************************************************************/
2941
2942 double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<int>& seqNumber, vector<int>& cumNumSeqs, vector<int>& seqIndex, vector<double>& dist, vector<double>& weight){
2943         
2944         try{
2945                 
2946                 vector<long double> P(numSeqs, 0);
2947                 int effNumOTUs = 0;
2948                 
2949                 for(int i=0;i<numOTUs;i++){
2950                         if(weight[i] > MIN_WEIGHT){
2951                                 effNumOTUs++;
2952                         }
2953                 }
2954                 
2955                 string hold;
2956                 for(int i=0;i<numOTUs;i++){
2957                         
2958                         if (m->control_pressed) { break; }
2959                         
2960                         for(int j=0;j<nSeqsPerOTU[i];j++){
2961                                 int index = cumNumSeqs[i] + j;
2962                                 int nI = seqIndex[index];
2963                                 double singleDist = dist[seqNumber[index]];
2964                                 
2965                                 P[nI] += weight[i] * exp(-singleDist * sigma);
2966                         }
2967                 }
2968                 double nLL = 0.00;
2969                 for(int i=0;i<numSeqs;i++){
2970                         if(P[i] == 0){  P[i] = DBL_EPSILON;     }
2971             
2972                         nLL += -log(P[i]);
2973                 }
2974                 
2975                 nLL = nLL -(double)numSeqs * log(sigma);
2976         
2977                 return nLL; 
2978         }
2979         catch(exception& e) {
2980                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2981                 exit(1);        
2982         }               
2983 }
2984
2985 /**************************************************************************************************/
2986
2987 int ShhherCommand::checkCentroids(int numOTUs, vector<int>& centroids, vector<double>& weight){
2988         try{
2989                 vector<int> unique(numOTUs, 1);
2990                 
2991                 for(int i=0;i<numOTUs;i++){
2992                         if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
2993                                 unique[i] = -1;
2994                         }
2995                 }
2996                 
2997                 for(int i=0;i<numOTUs;i++){
2998                         
2999                         if (m->control_pressed) { break; }
3000                         
3001                         if(unique[i] == 1){
3002                                 for(int j=i+1;j<numOTUs;j++){
3003                                         if(unique[j] == 1){
3004                                                 
3005                                                 if(centroids[j] == centroids[i]){
3006                                                         unique[j] = 0;
3007                                                         centroids[j] = -1;
3008                                                         
3009                                                         weight[i] += weight[j];
3010                                                         weight[j] = 0.0;
3011                                                 }
3012                                         }
3013                                 }
3014                         }
3015                 }
3016         
3017         return 0;
3018         }
3019         catch(exception& e) {
3020                 m->errorOut(e, "ShhherCommand", "checkCentroids");
3021                 exit(1);        
3022         }               
3023 }
3024 /**************************************************************************************************/
3025
3026 void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<double>& dist, 
3027                                      vector<double>& weight, vector<short>& change, vector<int>& centroids,
3028                                      vector<vector<int> >& aaP, vector<double>& singleTau, vector<vector<int> >& aaI,   
3029                                      vector<int>& seqNumber, vector<int>& seqIndex,
3030                                      vector<short>& uniqueFlowgrams,
3031                                      vector<short>& flowDataIntI, int numFlowCells, vector<int>& lengths){
3032         
3033         try{
3034                 
3035                 int total = 0;
3036                 vector<double> newTau(numOTUs,0);
3037                 vector<double> norms(numSeqs, 0);
3038                 nSeqsPerOTU.assign(numOTUs, 0);
3039         
3040                 for(int i=0;i<numSeqs;i++){
3041                         
3042                         if (m->control_pressed) { break; }
3043                         
3044                         int indexOffset = i * numOTUs;
3045             
3046                         double offset = 1e8;
3047                         
3048                         for(int j=0;j<numOTUs;j++){
3049                 
3050                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
3051                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
3052                                 }
3053                 
3054                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
3055                                         offset = dist[indexOffset + j];
3056                                 }
3057                         }
3058             
3059                         for(int j=0;j<numOTUs;j++){
3060                                 if(weight[j] > MIN_WEIGHT){
3061                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
3062                                         norms[i] += newTau[j];
3063                                 }
3064                                 else{
3065                                         newTau[j] = 0.0;
3066                                 }
3067                         }
3068             
3069                         for(int j=0;j<numOTUs;j++){
3070                                 newTau[j] /= norms[i];
3071                         }
3072             
3073                         for(int j=0;j<numOTUs;j++){
3074                                 if(newTau[j] > MIN_TAU){
3075                                         
3076                                         int oldTotal = total;
3077                                         
3078                                         total++;
3079                                         
3080                                         singleTau.resize(total, 0);
3081                                         seqNumber.resize(total, 0);
3082                                         seqIndex.resize(total, 0);
3083                                         
3084                                         singleTau[oldTotal] = newTau[j];
3085                                         
3086                                         aaP[j][nSeqsPerOTU[j]] = oldTotal;
3087                                         aaI[j][nSeqsPerOTU[j]] = i;
3088                                         nSeqsPerOTU[j]++;
3089                                 }
3090                         }
3091             
3092                 }
3093         
3094         }
3095         catch(exception& e) {
3096                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
3097                 exit(1);        
3098         }               
3099 }
3100 /**************************************************************************************************/
3101
3102 int ShhherCommand::fill(int numOTUs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
3103         try {
3104                 int index = 0;
3105                 for(int i=0;i<numOTUs;i++){
3106                         
3107                         if (m->control_pressed) { return 0; }
3108                         
3109                         cumNumSeqs[i] = index;
3110                         for(int j=0;j<nSeqsPerOTU[i];j++){
3111                                 seqNumber[index] = aaP[i][j];
3112                                 seqIndex[index] = aaI[i][j];
3113                                 
3114                                 index++;
3115                         }
3116                 }
3117         
3118         return 0; 
3119         }
3120         catch(exception& e) {
3121                 m->errorOut(e, "ShhherCommand", "fill");
3122                 exit(1);        
3123         }               
3124 }
3125 /**************************************************************************************************/
3126
3127 void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU,
3128                             vector<int>& otuData, vector<double>& singleTau, vector<double>& dist, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
3129         
3130         try {
3131                 vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
3132                 
3133                 for(int i=0;i<numOTUs;i++){
3134                         
3135                         if (m->control_pressed) { break; }
3136                         
3137                         for(int j=0;j<nSeqsPerOTU[i];j++){
3138                                 int index = cumNumSeqs[i] + j;
3139                                 double tauValue = singleTau[seqNumber[index]];
3140                                 int sIndex = seqIndex[index];
3141                                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
3142                         }
3143                 }
3144                 
3145                 for(int i=0;i<numSeqs;i++){
3146                         double maxTau = -1.0000;
3147                         int maxOTU = -1;
3148                         for(int j=0;j<numOTUs;j++){
3149                                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
3150                                         maxTau = bigTauMatrix[i * numOTUs + j];
3151                                         maxOTU = j;
3152                                 }
3153                         }
3154                         
3155                         otuData[i] = maxOTU;
3156                 }
3157                 
3158                 nSeqsPerOTU.assign(numOTUs, 0);         
3159                 
3160                 for(int i=0;i<numSeqs;i++){
3161                         int index = otuData[i];
3162                         
3163                         singleTau[i] = 1.0000;
3164                         dist[i] = 0.0000;
3165                         
3166                         aaP[index][nSeqsPerOTU[index]] = i;
3167                         aaI[index][nSeqsPerOTU[index]] = i;
3168                         
3169                         nSeqsPerOTU[index]++;
3170                 }
3171         
3172                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);  
3173         }
3174         catch(exception& e) {
3175                 m->errorOut(e, "ShhherCommand", "setOTUs");
3176                 exit(1);        
3177         }               
3178 }
3179 /**************************************************************************************************/
3180
3181 void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string qualityFileName, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
3182                                    vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
3183                                    vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
3184         
3185         try {
3186         
3187                 ofstream qualityFile;
3188                 m->openOutputFile(qualityFileName, qualityFile);
3189         
3190                 qualityFile.setf(ios::fixed, ios::floatfield);
3191                 qualityFile.setf(ios::showpoint);
3192                 qualityFile << setprecision(6);
3193                 
3194                 vector<vector<int> > qualities(numOTUs);
3195                 vector<double> pr(HOMOPS, 0);
3196                 
3197                 
3198                 for(int i=0;i<numOTUs;i++){
3199                         
3200                         if (m->control_pressed) { break; }
3201                         
3202                         int index = 0;
3203                         int base = 0;
3204                         
3205                         if(nSeqsPerOTU[i] > 0){
3206                                 qualities[i].assign(1024, -1);
3207                                 
3208                                 while(index < numFlowCells){
3209                                         double maxPrValue = 1e8;
3210                                         short maxPrIndex = -1;
3211                                         double count = 0.0000;
3212                                         
3213                                         pr.assign(HOMOPS, 0);
3214                                         
3215                                         for(int j=0;j<nSeqsPerOTU[i];j++){
3216                                                 int lIndex = cumNumSeqs[i] + j;
3217                                                 double tauValue = singleTau[seqNumber[lIndex]];
3218                                                 int sequenceIndex = aaI[i][j];
3219                                                 short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
3220                                                 
3221                                                 count += tauValue;
3222                                                 
3223                                                 for(int s=0;s<HOMOPS;s++){
3224                                                         pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
3225                                                 }
3226                                         }
3227                                         
3228                                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
3229                                         maxPrValue = pr[maxPrIndex];
3230                                         
3231                                         if(count > MIN_COUNT){
3232                                                 double U = 0.0000;
3233                                                 double norm = 0.0000;
3234                                                 
3235                                                 for(int s=0;s<HOMOPS;s++){
3236                                                         norm += exp(-(pr[s] - maxPrValue));
3237                                                 }
3238                                                 
3239                                                 for(int s=1;s<=maxPrIndex;s++){
3240                                                         int value = 0;
3241                                                         double temp = 0.0000;
3242                                                         
3243                                                         U += exp(-(pr[s-1]-maxPrValue))/norm;
3244                                                         
3245                                                         if(U>0.00){
3246                                                                 temp = log10(U);
3247                                                         }
3248                                                         else{
3249                                                                 temp = -10.1;
3250                                                         }
3251                                                         temp = floor(-10 * temp);
3252                                                         value = (int)floor(temp);
3253                                                         if(value > 100){        value = 100;    }
3254                                                         
3255                                                         qualities[i][base] = (int)value;
3256                                                         base++;
3257                                                 }
3258                                         }
3259                                         
3260                                         index++;
3261                                 }
3262                         }
3263                         
3264                         
3265                         if(otuCounts[i] > 0){
3266                                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
3267                         
3268                                 int j=4;        //need to get past the first four bases
3269                                 while(qualities[i][j] != -1){
3270                     qualityFile << qualities[i][j] << ' ';
3271                     if (j > qualities[i].size()) { break; }
3272                     j++;
3273                                 }
3274                                 qualityFile << endl;
3275                         }
3276                 }
3277                 qualityFile.close();
3278                 outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName);
3279         
3280         }
3281         catch(exception& e) {
3282                 m->errorOut(e, "ShhherCommand", "writeQualities");
3283                 exit(1);        
3284         }               
3285 }
3286
3287 /**************************************************************************************************/
3288
3289 void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string fastaFileName, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
3290         try {
3291                 
3292                 ofstream fastaFile;
3293                 m->openOutputFile(fastaFileName, fastaFile);
3294                 
3295                 vector<string> names(numOTUs, "");
3296                 
3297                 for(int i=0;i<numOTUs;i++){
3298                         
3299                         if (m->control_pressed) { break; }
3300                         
3301                         int index = centroids[i];
3302                         
3303                         if(otuCounts[i] > 0){
3304                                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
3305                                 
3306                                 string newSeq = "";
3307                                 
3308                                 for(int j=0;j<numFlowCells;j++){
3309                                         
3310                                         char base = flowOrder[j % 4];
3311                                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
3312                                                 newSeq += base;
3313                                         }
3314                                 }
3315                                 
3316                                 if (newSeq.length() >= 4) {  fastaFile << newSeq.substr(4) << endl;  }
3317                 else {  fastaFile << "NNNN" << endl;  }
3318                         }
3319                 }
3320                 fastaFile.close();
3321         
3322                 outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
3323         
3324                 if(thisCompositeFASTAFileName != ""){
3325                         m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
3326                 }
3327         }
3328         catch(exception& e) {
3329                 m->errorOut(e, "ShhherCommand", "writeSequences");
3330                 exit(1);        
3331         }               
3332 }
3333
3334 /**************************************************************************************************/
3335
3336 void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string nameFileName, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
3337         try {
3338                 
3339                 ofstream nameFile;
3340                 m->openOutputFile(nameFileName, nameFile);
3341                 
3342                 for(int i=0;i<numOTUs;i++){
3343                         
3344                         if (m->control_pressed) { break; }
3345                         
3346                         if(otuCounts[i] > 0){
3347                                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
3348                                 
3349                                 for(int j=1;j<nSeqsPerOTU[i];j++){
3350                                         nameFile << ',' << seqNameVector[aaI[i][j]];
3351                                 }
3352                                 
3353                                 nameFile << endl;
3354                         }
3355                 }
3356                 nameFile.close();
3357                 outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
3358                 
3359                 
3360                 if(thisCompositeNamesFileName != ""){
3361                         m->appendFiles(nameFileName, thisCompositeNamesFileName);
3362                 }               
3363         }
3364         catch(exception& e) {
3365                 m->errorOut(e, "ShhherCommand", "writeNames");
3366                 exit(1);        
3367         }               
3368 }
3369
3370 /**************************************************************************************************/
3371
3372 void ShhherCommand::writeGroups(string groupFileName, string fileRoot, int numSeqs, vector<string>& seqNameVector){
3373         try {
3374         ofstream groupFile;
3375                 m->openOutputFile(groupFileName, groupFile);
3376                 
3377                 for(int i=0;i<numSeqs;i++){
3378                         if (m->control_pressed) { break; }
3379                         groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
3380                 }
3381                 groupFile.close();
3382                 outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName);
3383         
3384         }
3385         catch(exception& e) {
3386                 m->errorOut(e, "ShhherCommand", "writeGroups");
3387                 exit(1);        
3388         }               
3389 }
3390
3391 /**************************************************************************************************/
3392
3393 void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
3394         try {
3395                 ofstream otuCountsFile;
3396                 m->openOutputFile(otuCountsFileName, otuCountsFile);
3397                 
3398                 string bases = flowOrder;
3399                 
3400                 for(int i=0;i<numOTUs;i++){
3401                         
3402                         if (m->control_pressed) {
3403                                 break;
3404                         }
3405                         //output the translated version of the centroid sequence for the otu
3406                         if(otuCounts[i] > 0){
3407                                 int index = centroids[i];
3408                                 
3409                                 otuCountsFile << "ideal\t";
3410                                 for(int j=8;j<numFlowCells;j++){
3411                                         char base = bases[j % 4];
3412                                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
3413                                                 otuCountsFile << base;
3414                                         }
3415                                 }
3416                                 otuCountsFile << endl;
3417                                 
3418                                 for(int j=0;j<nSeqsPerOTU[i];j++){
3419                                         int sequence = aaI[i][j];
3420                                         otuCountsFile << seqNameVector[sequence] << '\t';
3421                                         
3422                                         string newSeq = "";
3423                                         
3424                                         for(int k=0;k<lengths[sequence];k++){
3425                                                 char base = bases[k % 4];
3426                                                 int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
3427                         
3428                                                 for(int s=0;s<freq;s++){
3429                                                         newSeq += base;
3430                                                         //otuCountsFile << base;
3431                                                 }
3432                                         }
3433                                         
3434                     if (newSeq.length() >= 4) {  otuCountsFile << newSeq.substr(4) << endl;  }
3435                     else {  otuCountsFile << "NNNN" << endl;  }
3436                                 }
3437                                 otuCountsFile << endl;
3438                         }
3439                 }
3440                 otuCountsFile.close();
3441                 outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName);
3442         
3443         }
3444         catch(exception& e) {
3445                 m->errorOut(e, "ShhherCommand", "writeClusters");
3446                 exit(1);        
3447         }               
3448 }
3449
3450 /**************************************************************************************************/
3451
3452 void ShhherCommand::getSingleLookUp(){
3453         try{
3454                 //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
3455                 singleLookUp.assign(HOMOPS * NUMBINS, 0);
3456                 
3457                 int index = 0;
3458                 ifstream lookUpFile;
3459                 m->openInputFile(lookupFileName, lookUpFile);
3460                 
3461                 for(int i=0;i<HOMOPS;i++){
3462                         
3463                         if (m->control_pressed) { break; }
3464                         
3465                         float logFracFreq;
3466                         lookUpFile >> logFracFreq;
3467                         
3468                         for(int j=0;j<NUMBINS;j++)      {
3469                                 lookUpFile >> singleLookUp[index];
3470                                 index++;
3471                         }
3472                 }       
3473                 lookUpFile.close();
3474         }
3475         catch(exception& e) {
3476                 m->errorOut(e, "ShhherCommand", "getSingleLookUp");
3477                 exit(1);
3478         }
3479 }
3480
3481 /**************************************************************************************************/
3482
3483 void ShhherCommand::getJointLookUp(){
3484         try{
3485                 
3486                 //      the most likely joint probability (-log) that two intenities have the same polymer length
3487                 jointLookUp.resize(NUMBINS * NUMBINS, 0);
3488                 
3489                 for(int i=0;i<NUMBINS;i++){
3490                         
3491                         if (m->control_pressed) { break; }
3492                         
3493                         for(int j=0;j<NUMBINS;j++){             
3494                                 
3495                                 double minSum = 100000000;
3496                                 
3497                                 for(int k=0;k<HOMOPS;k++){
3498                                         double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
3499                                         
3500                                         if(sum < minSum)        {       minSum = sum;           }
3501                                 }       
3502                                 jointLookUp[i * NUMBINS + j] = minSum;
3503                         }
3504                 }
3505         }
3506         catch(exception& e) {
3507                 m->errorOut(e, "ShhherCommand", "getJointLookUp");
3508                 exit(1);
3509         }
3510 }
3511
3512 /**************************************************************************************************/
3513
3514 double ShhherCommand::getProbIntensity(int intIntensity){                          
3515         try{
3516
3517                 double minNegLogProb = 100000000; 
3518
3519                 
3520                 for(int i=0;i<HOMOPS;i++){//loop signal strength
3521                         
3522                         if (m->control_pressed) { break; }
3523                         
3524                         float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
3525                         if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
3526                 }
3527                 
3528                 return minNegLogProb;
3529         }
3530         catch(exception& e) {
3531                 m->errorOut(e, "ShhherCommand", "getProbIntensity");
3532                 exit(1);
3533         }
3534 }
3535
3536
3537
3538