]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / shhhercommand.cpp
1 /*
2  *  shhher.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhercommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ShhherCommand::setParameters(){  
14         try {
15                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
16                 CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
17                 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
18                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
19                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
20                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
21         CommandParameter plarge("large", "Number", "", "-1", "", "", "",false,false); parameters.push_back(plarge);
22                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
23                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
24                 CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ShhherCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string ShhherCommand::getHelpString(){  
39         try {
40                 string helpString = "";
41                 helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
42                 return helpString;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ShhherCommand", "getHelpString");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ShhherCommand::getOutputFileNameTag(string type, string inputName=""){   
51         try {
52         string outputFileName = "";
53                 map<string, vector<string> >::iterator it;
54         
55         //is this a type this command creates
56         it = outputTypes.find(type);
57         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
58         else {
59             if (type == "fasta")            {   outputFileName =  "shhh.fasta";   }
60             else if (type == "name")    {   outputFileName =  "shhh.names";   }
61             else if (type == "group")        {   outputFileName =  "shhh.groups";   }
62             else if (type == "counts")        {   outputFileName =  "shhh.counts";   }
63             else if (type == "qfile")        {   outputFileName =  "shhh.qual";   }
64             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
65         }
66         return outputFileName;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "ShhherCommand", "getOutputFileNameTag");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74
75 ShhherCommand::ShhherCommand(){ 
76         try {
77                 abort = true; calledHelp = true;
78                 setParameters();
79                 
80                 //initialize outputTypes
81                 vector<string> tempOutNames;
82                 outputTypes["fasta"] = tempOutNames;
83         outputTypes["name"] = tempOutNames;
84         outputTypes["group"] = tempOutNames;
85         outputTypes["counts"] = tempOutNames;
86         outputTypes["qfile"] = tempOutNames;
87
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
91                 exit(1);
92         }
93 }
94
95 //**********************************************************************************************************************
96
97 ShhherCommand::ShhherCommand(string option) {
98         try {
99         
100 #ifdef USE_MPI
101                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
102                 MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
103         
104                 if(pid == 0){
105 #endif
106                 abort = false; calledHelp = false;   
107                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127             vector<string> tempOutNames;
128             outputTypes["fasta"] = tempOutNames;
129             outputTypes["name"] = tempOutNames;
130             outputTypes["group"] = tempOutNames;
131             outputTypes["counts"] = tempOutNames;
132             outputTypes["qfile"] = tempOutNames;
133
134                         
135                         //if the user changes the input directory command factory will send this info to us in the output parameter 
136                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
137                         if (inputDir == "not found"){   inputDir = "";          }
138                         else {
139                                 string path;
140                                 it = parameters.find("flow");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
146                                 }
147                                 
148                                 it = parameters.find("lookup");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
154                                 }
155
156                                 it = parameters.find("file");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
162                                 }
163                         }
164             
165             //if the user changes the output directory command factory will send this info to us in the output parameter 
166                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
167             
168                         //check for required parameters
169                         flowFileName = validParameter.validFile(parameters, "flow", true);
170                         flowFilesFileName = validParameter.validFile(parameters, "file", true);
171                         if (flowFileName == "not found" && flowFilesFileName == "not found") {
172                                 m->mothurOut("values for either flow or file must be provided for the shhh.flows command.");
173                                 m->mothurOutEndLine();
174                                 abort = true; 
175                         }
176                         else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; }
177                         
178                         if(flowFileName != "not found"){
179                                 compositeFASTAFileName = "";    
180                                 compositeNamesFileName = "";    
181                         }
182                         else{
183                                 ofstream temp;
184                 
185                 string thisoutputDir = outputDir;
186                 if (outputDir == "") {  thisoutputDir =  m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it
187                 
188                                 //we want to rip off .files, and also .flow if its there
189                 string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName));
190                 if (fileroot[fileroot.length()-1] == '.') {  fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot
191                 string extension = m->getExtension(fileroot);
192                 if (extension == ".flow") { fileroot = m->getRootName(fileroot);  }
193                 else { fileroot += "."; } //add back if needed
194                 
195                                 compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta";
196                                 m->openOutputFile(compositeFASTAFileName, temp);
197                                 temp.close();
198                                 
199                                 compositeNamesFileName = thisoutputDir + fileroot + "shhh.names";
200                                 m->openOutputFile(compositeNamesFileName, temp);
201                                 temp.close();
202                         }
203             
204             if(flowFilesFileName != "not found"){
205                 string fName;
206                 
207                 ifstream flowFilesFile;
208                 m->openInputFile(flowFilesFileName, flowFilesFile);
209                 while(flowFilesFile){
210                     fName = m->getline(flowFilesFile);
211                     
212                     //test if file is valid
213                     ifstream in;
214                     int ableToOpen = m->openInputFile(fName, in, "noerror");
215                     in.close(); 
216                     if (ableToOpen == 1) {
217                         if (inputDir != "") { //default path is set
218                             string tryPath = inputDir + fName;
219                             m->mothurOut("Unable to open " + fName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
220                             ifstream in2;
221                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
222                             in2.close();
223                             fName = tryPath;
224                         }
225                     }
226                     
227                     if (ableToOpen == 1) {
228                         if (m->getDefaultPath() != "") { //default path is set
229                             string tryPath = m->getDefaultPath() + m->getSimpleName(fName);
230                             m->mothurOut("Unable to open " + fName + ". Trying default " + tryPath); m->mothurOutEndLine();
231                             ifstream in2;
232                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
233                             in2.close();
234                             fName = tryPath;
235                         }
236                     }
237                     
238                     //if you can't open it its not in current working directory or inputDir, try mothur excutable location
239                     if (ableToOpen == 1) {
240                         string exepath = m->argv;
241                         string tempPath = exepath;
242                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
243                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
244                         
245                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(fName);
246                         m->mothurOut("Unable to open " + fName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
247                         ifstream in2;
248                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
249                         in2.close();
250                         fName = tryPath;
251                     }
252                     
253                     if (ableToOpen == 1) {  m->mothurOut("Unable to open " + fName + ". Disregarding. "); m->mothurOutEndLine();  }
254                     else { flowFileVector.push_back(fName); }
255                     m->gobble(flowFilesFile);
256                 }
257                 flowFilesFile.close();
258                 if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
259             }
260             else{
261                 if (outputDir == "") { outputDir = m->hasPath(flowFileName); }
262                 flowFileVector.push_back(flowFileName);
263             }
264                 
265                         //check for optional parameter and set defaults
266                         // ...at some point should added some additional type checking...
267                         string temp;
268                         temp = validParameter.validFile(parameters, "lookup", true);
269                         if (temp == "not found")        {       
270                                 lookupFileName = "LookUp_Titanium.pat"; 
271                                 
272                                 int ableToOpen;
273                                 ifstream in;
274                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
275                                 in.close();     
276                                 
277                                 //if you can't open it, try input location
278                                 if (ableToOpen == 1) {
279                                         if (inputDir != "") { //default path is set
280                                                 string tryPath = inputDir + lookupFileName;
281                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
282                                                 ifstream in2;
283                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
284                                                 in2.close();
285                                                 lookupFileName = tryPath;
286                                         }
287                                 }
288                                 
289                                 //if you can't open it, try default location
290                                 if (ableToOpen == 1) {
291                                         if (m->getDefaultPath() != "") { //default path is set
292                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
293                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
294                                                 ifstream in2;
295                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
296                                                 in2.close();
297                                                 lookupFileName = tryPath;
298                                         }
299                                 }
300                                 
301                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
302                                 if (ableToOpen == 1) {
303                                         string exepath = m->argv;
304                                         string tempPath = exepath;
305                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
306                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
307                                         
308                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
309                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
310                                         ifstream in2;
311                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
312                                         in2.close();
313                                         lookupFileName = tryPath;
314                                 }
315                                 
316                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
317                         }
318                         else if(temp == "not open")     {       
319                                 
320                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
321                                 
322                                 //if you can't open it its not inputDir, try mothur excutable location
323                                 string exepath = m->argv;
324                                 string tempPath = exepath;
325                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
326                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
327                                         
328                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
329                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
330                                 ifstream in2;
331                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
332                                 in2.close();
333                                 lookupFileName = tryPath;
334                                 
335                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
336                         }else                                           {       lookupFileName = temp;  }
337                         
338                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
339                         m->setProcessors(temp);
340                         m->mothurConvert(temp, processors);
341
342                         temp = validParameter.validFile(parameters, "cutoff", false);   if (temp == "not found"){       temp = "0.01";          }
343                         m->mothurConvert(temp, cutoff); 
344                         
345                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
346                         m->mothurConvert(temp, minDelta); 
347
348                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
349                         m->mothurConvert(temp, maxIters); 
350             
351             temp = validParameter.validFile(parameters, "large", false);        if (temp == "not found"){       temp = "0";             }
352                         m->mothurConvert(temp, largeSize); 
353             if (largeSize != 0) { large = true; }
354             else { large = false;  }
355             if (largeSize < 0) {  m->mothurOut("The value of the large cannot be negative.\n"); }
356             
357 #ifdef USE_MPI
358             if (large) { m->mothurOut("The large parameter is not available with the MPI-Enabled version.\n"); large=false; }                           
359 #endif
360             
361
362                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
363                         m->mothurConvert(temp, sigma); 
364                         
365                         flowOrder = validParameter.validFile(parameters, "order", false);
366                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
367                         else if(flowOrder.length() != 4){
368                                 m->mothurOut("The value of the order option must be four bases long\n");
369                         }
370                         
371                 }
372 #ifdef USE_MPI
373                 }                               
374 #endif
375         }
376         catch(exception& e) {
377                 m->errorOut(e, "ShhherCommand", "ShhherCommand");
378                 exit(1);
379         }
380 }
381 //**********************************************************************************************************************
382 #ifdef USE_MPI
383 int ShhherCommand::execute(){
384         try {
385                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
386                 
387                 int tag = 1976;
388                 MPI_Status status; 
389                         
390                 if(pid == 0){
391
392                         for(int i=1;i<ncpus;i++){
393                                 MPI_Send(&abort, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
394                         }
395                         if(abort == 1){ return 0;       }
396
397                         processors = ncpus;
398                         
399                         m->mothurOut("\nGetting preliminary data...\n");
400                         getSingleLookUp();      if (m->control_pressed) { return 0; }
401                         getJointLookUp();       if (m->control_pressed) { return 0; }
402                         
403             vector<string> flowFileVector;
404                         if(flowFilesFileName != "not found"){
405                                 string fName;
406                 
407                                 ifstream flowFilesFile;
408                                 m->openInputFile(flowFilesFileName, flowFilesFile);
409                                 while(flowFilesFile){
410                                         fName = m->getline(flowFilesFile);
411                                         flowFileVector.push_back(fName);
412                                         m->gobble(flowFilesFile);
413                                 }
414                         }
415                         else{
416                                 flowFileVector.push_back(flowFileName);
417                         }
418             
419                         int numFiles = flowFileVector.size();
420
421                         for(int i=1;i<ncpus;i++){
422                                 MPI_Send(&numFiles, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
423                         }
424                         
425                         for(int i=0;i<numFiles;i++){
426                                 
427                                 if (m->control_pressed) { break; }
428                                 
429                                 double begClock = clock();
430                                 unsigned long long begTime = time(NULL);
431
432                                 flowFileName = flowFileVector[i];
433                                 
434                                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(numFiles) + ")\t<<<<<\n");
435                                 m->mothurOut("Reading flowgrams...\n");
436                                 getFlowData();
437                                 
438                                 if (m->control_pressed) { break; }
439
440                                 m->mothurOut("Identifying unique flowgrams...\n");
441                                 getUniques();
442                                 
443                                 if (m->control_pressed) { break; }
444
445                                 m->mothurOut("Calculating distances between flowgrams...\n");
446                                 char fileName[1024];
447                                 strcpy(fileName, flowFileName.c_str());
448
449                                 for(int i=1;i<ncpus;i++){
450                                         MPI_Send(&fileName[0], 1024, MPI_CHAR, i, tag, MPI_COMM_WORLD);
451
452                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
453                                         MPI_Send(&numUniques, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
454                                         MPI_Send(&numFlowCells, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
455                                         MPI_Send(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, i, tag, MPI_COMM_WORLD);
456                                         MPI_Send(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
457                                         MPI_Send(&mapUniqueToSeq[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
458                                         MPI_Send(&mapSeqToUnique[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
459                                         MPI_Send(&lengths[0], numSeqs, MPI_INT, i, tag, MPI_COMM_WORLD);
460                                         MPI_Send(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
461                                         MPI_Send(&cutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
462                                 }                               
463                                                         
464                                 string distFileName = flowDistMPI(0, int(sqrt(1.0/float(ncpus)) * numUniques));
465                                 
466                                 if (m->control_pressed) { break; }
467                                 
468                                 int done;
469                                 for(int i=1;i<ncpus;i++){
470                                         MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
471                                         
472                                         m->appendFiles((distFileName + ".temp." + toString(i)), distFileName);
473                                         m->mothurRemove((distFileName + ".temp." + toString(i)));
474                                 }
475
476                                 string namesFileName = createNamesFile();
477                                 
478                                 if (m->control_pressed) { break; }
479                                 
480                                 m->mothurOut("\nClustering flowgrams...\n");
481                                 string listFileName = cluster(distFileName, namesFileName);
482                                 
483                                 if (m->control_pressed) { break; }
484                                 
485                 
486                 
487                                 getOTUData(listFileName);
488
489                                 m->mothurRemove(distFileName);
490                                 m->mothurRemove(namesFileName);
491                                 m->mothurRemove(listFileName);
492                                 
493                                 if (m->control_pressed) { break; }
494                                 
495                                 initPyroCluster();
496
497                                 if (m->control_pressed) { break; }
498                                 
499             
500                                 for(int i=1;i<ncpus;i++){
501                                         MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
502                                         MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
503                                         MPI_Send(&uniqueFlowgrams[0], numFlowCells * numUniques, MPI_SHORT, i, tag, MPI_COMM_WORLD);
504                                         MPI_Send(&sigma, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
505                                 }
506                                 
507                                 if (m->control_pressed) { break; }
508                                 
509                                 double maxDelta = 0;
510                                 int iter = 0;
511                                 
512                                 int numOTUsOnCPU = numOTUs / ncpus;
513                                 int numSeqsOnCPU = numSeqs / ncpus;
514                                 m->mothurOut("\nDenoising flowgrams...\n");
515                                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
516                                 
517                                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
518                                         
519                                         double cycClock = clock();
520                                         unsigned long long cycTime = time(NULL);
521                                         fill();
522                                         
523                                         if (m->control_pressed) { break; }
524
525                                         int total = singleTau.size();
526                                         for(int i=1;i<ncpus;i++){
527                                                 MPI_Send(&total, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
528                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
529                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
530                                                 
531                                                 MPI_Send(&singleTau[0], total, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
532                                                 MPI_Send(&seqNumber[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
533                                                 MPI_Send(&seqIndex[0], total, MPI_INT, i, tag, MPI_COMM_WORLD);
534                                                 MPI_Send(&nSeqsPerOTU[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
535                                                 MPI_Send(&cumNumSeqs[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
536                                         }
537                                         
538                                         calcCentroidsDriver(0, numOTUsOnCPU);
539                                         
540                                         for(int i=1;i<ncpus;i++){
541                                                 int otuStart = i * numOTUs / ncpus;
542                                                 int otuStop = (i + 1) * numOTUs / ncpus;
543                                                 
544                                                 vector<int> tempCentroids(numOTUs, 0);
545                                                 vector<short> tempChange(numOTUs, 0);
546                                                 
547                                                 MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
548                                                 MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status);
549                                                 
550                                                 for(int j=otuStart;j<otuStop;j++){
551                                                         centroids[j] = tempCentroids[j];
552                                                         change[j] = tempChange[j];
553                                                 }
554                                         }
555                                                                         
556                                         maxDelta = getNewWeights(); if (m->control_pressed) { break; }
557                                         double nLL = getLikelihood(); if (m->control_pressed) { break; }
558                                         checkCentroids(); if (m->control_pressed) { break; }
559                                         
560                                         for(int i=1;i<ncpus;i++){
561                                                 MPI_Send(&centroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD);
562                                                 MPI_Send(&weight[0], numOTUs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
563                                                 MPI_Send(&change[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD);
564                                         }
565                                         
566                                         calcNewDistancesParent(0, numSeqsOnCPU);
567
568                                         total = singleTau.size();
569
570                                         for(int i=1;i<ncpus;i++){
571                                                 int childTotal;
572                                                 int seqStart = i * numSeqs / ncpus;
573                                                 int seqStop = (i + 1) * numSeqs / ncpus;
574                                                 
575                                                 MPI_Recv(&childTotal, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
576
577                                                 vector<int> childSeqIndex(childTotal, 0);
578                                                 vector<double> childSingleTau(childTotal, 0);
579                                                 vector<double> childDist(numSeqs * numOTUs, 0);
580                                                 vector<int> otuIndex(childTotal, 0);
581                                                 
582                                                 MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
583                                                 MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
584                                                 MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
585                                                 MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
586
587                                                 int oldTotal = total;
588                                                 total += childTotal;
589                                                 singleTau.resize(total, 0);
590                                                 seqIndex.resize(total, 0);
591                                                 seqNumber.resize(total, 0);
592                                                 
593                                                 int childIndex = 0;
594                                                 
595                                                 for(int j=oldTotal;j<total;j++){
596                                                         int otuI = otuIndex[childIndex];
597                                                         int seqI = childSeqIndex[childIndex];
598                                                         
599                                                         singleTau[j] = childSingleTau[childIndex];
600                                                         
601                                                         aaP[otuI][nSeqsPerOTU[otuI]] = j;
602                                                         aaI[otuI][nSeqsPerOTU[otuI]] = seqI;
603                                                         nSeqsPerOTU[otuI]++;
604                                                         childIndex++;
605                                                 }
606                                                 
607                                                 int index = seqStart * numOTUs;
608                                                 for(int j=seqStart;j<seqStop;j++){
609                                                         for(int k=0;k<numOTUs;k++){
610                                                                 dist[index] = childDist[index];
611                                                                 index++;
612                                                         }
613                                                 }                                       
614                                         }
615                                         
616                                         iter++;
617                                         
618                                         m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');                  
619
620                                         if((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
621                                                 int live = 1;
622                                                 for(int i=1;i<ncpus;i++){
623                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
624                                                 }
625                                         }
626                                         else{
627                                                 int live = 0;
628                                                 for(int i=1;i<ncpus;i++){
629                                                         MPI_Send(&live, 1, MPI_INT, i, tag, MPI_COMM_WORLD); //send kill command
630                                                 }
631                                         }
632                                         
633                                 }       
634                                 
635                                 if (m->control_pressed) { break; }
636                                 
637                                 m->mothurOut("\nFinalizing...\n");
638                                 fill();
639                                 
640                                 if (m->control_pressed) { break; }
641                                 
642                                 setOTUs();
643                                 
644                                 vector<int> otuCounts(numOTUs, 0);
645                                 for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
646                                 calcCentroidsDriver(0, numOTUs);
647                                 
648                                 if (m->control_pressed) { break; }
649                                 
650                                 writeQualities(otuCounts);      if (m->control_pressed) { break; }
651                                 writeSequences(otuCounts);      if (m->control_pressed) { break; }
652                                 writeNames(otuCounts);          if (m->control_pressed) { break; }
653                                 writeClusters(otuCounts);       if (m->control_pressed) { break; }
654                                 writeGroups();                          if (m->control_pressed) { break; }
655                                 
656                                                                  
657                                 m->mothurOut("Total time to process " + toString(flowFileName) + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');                 
658                         }
659                 }
660                 else{
661                         int abort = 1;
662
663                         MPI_Recv(&abort, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
664                         if(abort){      return 0;       }
665
666                         int numFiles;
667                         MPI_Recv(&numFiles, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
668
669                         for(int i=0;i<numFiles;i++){
670                                 
671                                 if (m->control_pressed) { break; }
672                                 
673                                 //Now into the pyrodist part
674                                 bool live = 1;
675
676                                 char fileName[1024];
677                                 MPI_Recv(&fileName, 1024, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status);
678                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
679                                 MPI_Recv(&numUniques, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
680                                 MPI_Recv(&numFlowCells, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
681                                 
682                                 flowDataIntI.resize(numSeqs * numFlowCells);
683                                 flowDataPrI.resize(numSeqs * numFlowCells);
684                                 mapUniqueToSeq.resize(numSeqs);
685                                 mapSeqToUnique.resize(numSeqs);
686                                 lengths.resize(numSeqs);
687                                 jointLookUp.resize(NUMBINS * NUMBINS);
688
689                                 MPI_Recv(&flowDataIntI[0], numSeqs * numFlowCells, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
690                                 MPI_Recv(&flowDataPrI[0], numSeqs * numFlowCells, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
691                                 MPI_Recv(&mapUniqueToSeq[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
692                                 MPI_Recv(&mapSeqToUnique[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
693                                 MPI_Recv(&lengths[0], numSeqs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
694                                 MPI_Recv(&jointLookUp[0], NUMBINS * NUMBINS, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
695                                 MPI_Recv(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
696
697                                 flowFileName = string(fileName);
698                                 int flowDistStart = int(sqrt(float(pid)/float(ncpus)) * numUniques);
699                                 int flowDistEnd = int(sqrt(float(pid+1)/float(ncpus)) * numUniques);
700                                 
701                                 string distanceStringChild = flowDistMPI(flowDistStart, flowDistEnd);
702                                 
703                                 if (m->control_pressed) { break; }
704                                 
705                                 int done = 1;
706                                 MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
707
708                                 //Now into the pyronoise part
709                                 MPI_Recv(&numOTUs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
710                                 
711                                 singleLookUp.resize(HOMOPS * NUMBINS);
712                                 uniqueFlowgrams.resize(numUniques * numFlowCells);
713                                 weight.resize(numOTUs);
714                                 centroids.resize(numOTUs);
715                                 change.resize(numOTUs);
716                                 dist.assign(numOTUs * numSeqs, 0);
717                                 nSeqsPerOTU.resize(numOTUs);
718                                 cumNumSeqs.resize(numOTUs);
719
720                                 MPI_Recv(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
721                                 MPI_Recv(&uniqueFlowgrams[0], uniqueFlowgrams.size(), MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
722                                 MPI_Recv(&sigma, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
723                                 
724                                 int startOTU = pid * numOTUs / ncpus;
725                                 int endOTU = (pid + 1) * numOTUs / ncpus;
726
727                                 int startSeq = pid * numSeqs / ncpus;
728                                 int endSeq = (pid + 1) * numSeqs /ncpus;
729                                 
730                                 int total;
731
732                                 while(live){
733                                         
734                                         if (m->control_pressed) { break; }
735                                         
736                                         MPI_Recv(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
737                                         singleTau.assign(total, 0.0000);
738                                         seqNumber.assign(total, 0);
739                                         seqIndex.assign(total, 0);
740
741                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
742                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
743                                         MPI_Recv(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
744                                         MPI_Recv(&seqNumber[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
745                                         MPI_Recv(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
746                                         MPI_Recv(&nSeqsPerOTU[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
747                                         MPI_Recv(&cumNumSeqs[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
748
749                                         calcCentroidsDriver(startOTU, endOTU);
750
751                                         MPI_Send(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD);
752                                         MPI_Send(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD);
753
754                                         MPI_Recv(&centroids[0], numOTUs, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
755                                         MPI_Recv(&weight[0], numOTUs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &status);
756                                         MPI_Recv(&change[0], numOTUs, MPI_SHORT, 0, tag, MPI_COMM_WORLD, &status);
757
758                                         vector<int> otuIndex(total, 0);
759                                         calcNewDistancesChildMPI(startSeq, endSeq, otuIndex);
760                                         total = otuIndex.size();
761                                         
762                                         MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
763                                         MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
764                                         MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
765                                         MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
766                                         MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD);
767                                 
768                                         MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
769                                 }
770                         }
771                 }
772                 
773                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
774                 
775                 MPI_Barrier(MPI_COMM_WORLD);
776
777                 
778                 if(compositeFASTAFileName != ""){
779                         outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName);
780                         outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName); 
781                 }
782
783                 m->mothurOutEndLine();
784                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
785                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
786                 m->mothurOutEndLine();
787                 
788                 return 0;
789
790         }
791         catch(exception& e) {
792                 m->errorOut(e, "ShhherCommand", "execute");
793                 exit(1);
794         }
795 }
796 /**************************************************************************************************/
797 string ShhherCommand::createNamesFile(){
798         try{
799                 
800                 vector<string> duplicateNames(numUniques, "");
801                 for(int i=0;i<numSeqs;i++){
802                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
803                 }
804                 
805                 string nameFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
806                 
807                 ofstream nameFile;
808                 m->openOutputFile(nameFileName, nameFile);
809                 
810                 for(int i=0;i<numUniques;i++){
811                         
812                         if (m->control_pressed) { break; }
813                         
814             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
815                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
816                 }
817                 
818                 nameFile.close();
819                 return  nameFileName;
820         }
821         catch(exception& e) {
822                 m->errorOut(e, "ShhherCommand", "createNamesFile");
823                 exit(1);
824         }
825 }
826 /**************************************************************************************************/
827
828 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
829         try{            
830                 ostringstream outStream;
831                 outStream.setf(ios::fixed, ios::floatfield);
832                 outStream.setf(ios::dec, ios::basefield);
833                 outStream.setf(ios::showpoint);
834                 outStream.precision(6);
835                 
836                 int begTime = time(NULL);
837                 double begClock = clock();
838                 
839                 for(int i=startSeq;i<stopSeq;i++){
840                         
841                         if (m->control_pressed) { break; }
842                         
843                         for(int j=0;j<i;j++){
844                                 float flowDistance = calcPairwiseDist(mapUniqueToSeq[i], mapUniqueToSeq[j]);
845                                 
846                                 if(flowDistance < 1e-6){
847                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
848                                 }
849                                 else if(flowDistance <= cutoff){
850                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
851                                 }
852                         }
853                         if(i % 100 == 0){
854                                 m->mothurOut(toString(i) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
855                         }
856                 }
857                 
858                 string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
859                 if(pid != 0){   fDistFileName += ".temp." + toString(pid);      }
860                 
861                 if (m->control_pressed) { return fDistFileName; }
862                 
863                 m->mothurOut(toString(stopSeq) + '\t' + toString(time(NULL) - begTime) + '\t' + toString((clock()-begClock)/CLOCKS_PER_SEC) + '\n');
864
865                 ofstream distFile(fDistFileName.c_str());
866                 distFile << outStream.str();            
867                 distFile.close();
868                 
869                 return fDistFileName;
870         }
871         catch(exception& e) {
872                 m->errorOut(e, "ShhherCommand", "flowDistMPI");
873                 exit(1);
874         }
875 }
876 /**************************************************************************************************/
877  
878 void ShhherCommand::getOTUData(string listFileName){
879     try {
880         
881         ifstream listFile;
882         m->openInputFile(listFileName, listFile);
883         string label;
884         
885         listFile >> label >> numOTUs;
886         
887         otuData.assign(numSeqs, 0);
888         cumNumSeqs.assign(numOTUs, 0);
889         nSeqsPerOTU.assign(numOTUs, 0);
890         aaP.clear();aaP.resize(numOTUs);
891         
892         seqNumber.clear();
893         aaI.clear();
894         seqIndex.clear();
895         
896         string singleOTU = "";
897         
898         for(int i=0;i<numOTUs;i++){
899             
900             if (m->control_pressed) { break; }
901             
902             listFile >> singleOTU;
903             
904             istringstream otuString(singleOTU);
905             
906             while(otuString){
907                 
908                 string seqName = "";
909                 
910                 for(int j=0;j<singleOTU.length();j++){
911                     char letter = otuString.get();
912                     
913                     if(letter != ','){
914                         seqName += letter;
915                     }
916                     else{
917                         map<string,int>::iterator nmIt = nameMap.find(seqName);
918                         int index = nmIt->second;
919                         
920                         nameMap.erase(nmIt);
921                         
922                         otuData[index] = i;
923                         nSeqsPerOTU[i]++;
924                         aaP[i].push_back(index);
925                         seqName = "";
926                     }
927                 }
928                 
929                 map<string,int>::iterator nmIt = nameMap.find(seqName);
930                 
931                 int index = nmIt->second;
932                 nameMap.erase(nmIt);
933                 
934                 otuData[index] = i;
935                 nSeqsPerOTU[i]++;
936                 aaP[i].push_back(index);        
937                 
938                 otuString.get();
939             }
940             
941             sort(aaP[i].begin(), aaP[i].end());
942             for(int j=0;j<nSeqsPerOTU[i];j++){
943                 seqNumber.push_back(aaP[i][j]);
944             }
945             for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
946                 aaP[i].push_back(0);
947             }
948             
949             
950         }
951         
952         for(int i=1;i<numOTUs;i++){
953             cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
954         }
955         aaI = aaP;
956         seqIndex = seqNumber;
957         
958         listFile.close();       
959         
960     }
961     catch(exception& e) {
962         m->errorOut(e, "ShhherCommand", "getOTUData");
963         exit(1);        
964     }           
965 }
966
967 /**************************************************************************************************/
968
969 void ShhherCommand::initPyroCluster(){                          
970     try{
971         if (numOTUs < processors) { processors = 1; }
972         
973         if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
974         
975         dist.assign(numSeqs * numOTUs, 0);
976         change.assign(numOTUs, 1);
977         centroids.assign(numOTUs, -1);
978         weight.assign(numOTUs, 0);
979         singleTau.assign(numSeqs, 1.0);
980         
981         nSeqsBreaks.assign(processors+1, 0);
982         nOTUsBreaks.assign(processors+1, 0);
983         
984         if (m->debug) { m->mothurOut("[DEBUG]: made it through the memory allocation.\n"); }
985         
986         nSeqsBreaks[0] = 0;
987         for(int i=0;i<processors;i++){
988             nSeqsBreaks[i+1] = nSeqsBreaks[i] + (int)((double) numSeqs / (double) processors);
989             nOTUsBreaks[i+1] = nOTUsBreaks[i] + (int)((double) numOTUs / (double) processors);
990         }
991         nSeqsBreaks[processors] = numSeqs;
992         nOTUsBreaks[processors] = numOTUs;
993     }
994     catch(exception& e) {
995         m->errorOut(e, "ShhherCommand", "initPyroCluster");
996         exit(1);        
997     }
998 }
999
1000 /**************************************************************************************************/
1001
1002 void ShhherCommand::fill(){
1003     try {
1004         int index = 0;
1005         for(int i=0;i<numOTUs;i++){
1006             
1007             if (m->control_pressed) { break; }
1008             
1009             cumNumSeqs[i] = index;
1010             for(int j=0;j<nSeqsPerOTU[i];j++){
1011                 seqNumber[index] = aaP[i][j];
1012                 seqIndex[index] = aaI[i][j];
1013                 
1014                 index++;
1015             }
1016         }
1017     }
1018     catch(exception& e) {
1019         m->errorOut(e, "ShhherCommand", "fill");
1020         exit(1);        
1021     }           
1022 }
1023
1024 /**************************************************************************************************/
1025  
1026 void ShhherCommand::getFlowData(){
1027     try{
1028         ifstream flowFile;
1029         m->openInputFile(flowFileName, flowFile);
1030         
1031         string seqName;
1032         seqNameVector.clear();
1033         lengths.clear();
1034         flowDataIntI.clear();
1035         nameMap.clear();
1036         
1037         
1038         int currentNumFlowCells;
1039         
1040         float intensity;
1041         
1042         string numFlowTest;
1043         flowFile >> numFlowTest;
1044         
1045         if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
1046         else { convert(numFlowTest, numFlowCells); }
1047         
1048         int index = 0;//pcluster
1049         while(!flowFile.eof()){
1050             
1051             if (m->control_pressed) { break; }
1052             
1053             flowFile >> seqName >> currentNumFlowCells;
1054             lengths.push_back(currentNumFlowCells);
1055             
1056             seqNameVector.push_back(seqName);
1057             nameMap[seqName] = index++;//pcluster
1058             
1059             for(int i=0;i<numFlowCells;i++){
1060                 flowFile >> intensity;
1061                 if(intensity > 9.99)    {       intensity = 9.99;       }
1062                 int intI = int(100 * intensity + 0.0001);
1063                 flowDataIntI.push_back(intI);
1064             }
1065             m->gobble(flowFile);
1066         }
1067         flowFile.close();
1068         
1069         numSeqs = seqNameVector.size();         
1070         
1071         for(int i=0;i<numSeqs;i++){
1072             
1073             if (m->control_pressed) { break; }
1074             
1075             int iNumFlowCells = i * numFlowCells;
1076             for(int j=lengths[i];j<numFlowCells;j++){
1077                 flowDataIntI[iNumFlowCells + j] = 0;
1078             }
1079         }
1080         
1081     }
1082     catch(exception& e) {
1083         m->errorOut(e, "ShhherCommand", "getFlowData");
1084         exit(1);
1085     }
1086 }
1087 /**************************************************************************************************/
1088 void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector<int>& otuIndex){
1089         
1090         try{
1091                 vector<double> newTau(numOTUs,0);
1092                 vector<double> norms(numSeqs, 0);
1093                 otuIndex.clear();
1094                 seqIndex.clear();
1095                 singleTau.clear();
1096                 
1097                 for(int i=startSeq;i<stopSeq;i++){
1098                         
1099                         if (m->control_pressed) { break; }
1100                         
1101                         double offset = 1e8;
1102                         int indexOffset = i * numOTUs;
1103                         
1104                         for(int j=0;j<numOTUs;j++){
1105                                 
1106                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1107                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1108                                 }
1109                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1110                                         offset = dist[indexOffset + j];
1111                                 }
1112                         }
1113                         
1114                         for(int j=0;j<numOTUs;j++){
1115                                 if(weight[j] > MIN_WEIGHT){
1116                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1117                                         norms[i] += newTau[j];
1118                                 }
1119                                 else{
1120                                         newTau[j] = 0.0;
1121                                 }
1122                         }
1123                         
1124                         for(int j=0;j<numOTUs;j++){
1125                 
1126                                 newTau[j] /= norms[i];
1127                                 
1128                                 if(newTau[j] > MIN_TAU){
1129                                         otuIndex.push_back(j);
1130                                         seqIndex.push_back(i);
1131                                         singleTau.push_back(newTau[j]);
1132                                 }
1133                         }
1134                         
1135                 }
1136         }
1137         catch(exception& e) {
1138                 m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI");
1139                 exit(1);        
1140         }               
1141 }
1142
1143 /**************************************************************************************************/
1144
1145 void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){
1146     
1147     try{
1148         
1149         int total = 0;
1150         vector<double> newTau(numOTUs,0);
1151         vector<double> norms(numSeqs, 0);
1152         nSeqsPerOTU.assign(numOTUs, 0);
1153         
1154         for(int i=startSeq;i<stopSeq;i++){
1155             
1156             if (m->control_pressed) { break; }
1157             
1158             int indexOffset = i * numOTUs;
1159             
1160             double offset = 1e8;
1161             
1162             for(int j=0;j<numOTUs;j++){
1163                 
1164                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
1165                     dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]);
1166                 }
1167                 
1168                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
1169                     offset = dist[indexOffset + j];
1170                 }
1171             }
1172             
1173             for(int j=0;j<numOTUs;j++){
1174                 if(weight[j] > MIN_WEIGHT){
1175                     newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
1176                     norms[i] += newTau[j];
1177                 }
1178                 else{
1179                     newTau[j] = 0.0;
1180                 }
1181             }
1182             
1183             for(int j=0;j<numOTUs;j++){
1184                 newTau[j] /= norms[i];
1185             }
1186             
1187             for(int j=0;j<numOTUs;j++){
1188                 if(newTau[j] > MIN_TAU){
1189                     
1190                     int oldTotal = total;
1191                     
1192                     total++;
1193                     
1194                     singleTau.resize(total, 0);
1195                     seqNumber.resize(total, 0);
1196                     seqIndex.resize(total, 0);
1197                     
1198                     singleTau[oldTotal] = newTau[j];
1199                     
1200                     aaP[j][nSeqsPerOTU[j]] = oldTotal;
1201                     aaI[j][nSeqsPerOTU[j]] = i;
1202                     nSeqsPerOTU[j]++;
1203                 }
1204             }
1205             
1206         }
1207         
1208     }
1209     catch(exception& e) {
1210         m->errorOut(e, "ShhherCommand", "calcNewDistancesParent");
1211         exit(1);        
1212     }           
1213 }
1214
1215 /**************************************************************************************************/
1216
1217 void ShhherCommand::setOTUs(){
1218     
1219     try {
1220         vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
1221         
1222         for(int i=0;i<numOTUs;i++){
1223             
1224             if (m->control_pressed) { break; }
1225             
1226             for(int j=0;j<nSeqsPerOTU[i];j++){
1227                 int index = cumNumSeqs[i] + j;
1228                 double tauValue = singleTau[seqNumber[index]];
1229                 int sIndex = seqIndex[index];
1230                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
1231             }
1232         }
1233         
1234         for(int i=0;i<numSeqs;i++){
1235             double maxTau = -1.0000;
1236             int maxOTU = -1;
1237             for(int j=0;j<numOTUs;j++){
1238                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
1239                     maxTau = bigTauMatrix[i * numOTUs + j];
1240                     maxOTU = j;
1241                 }
1242             }
1243             
1244             otuData[i] = maxOTU;
1245         }
1246         
1247         nSeqsPerOTU.assign(numOTUs, 0);         
1248         
1249         for(int i=0;i<numSeqs;i++){
1250             int index = otuData[i];
1251             
1252             singleTau[i] = 1.0000;
1253             dist[i] = 0.0000;
1254             
1255             aaP[index][nSeqsPerOTU[index]] = i;
1256             aaI[index][nSeqsPerOTU[index]] = i;
1257             
1258             nSeqsPerOTU[index]++;
1259         }
1260         fill(); 
1261     }
1262     catch(exception& e) {
1263         m->errorOut(e, "ShhherCommand", "setOTUs");
1264         exit(1);        
1265     }           
1266 }
1267
1268 /**************************************************************************************************/
1269  
1270 void ShhherCommand::getUniques(){
1271     try{
1272         
1273         
1274         numUniques = 0;
1275         uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
1276         uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
1277         uniqueLengths.assign(numSeqs, 0);
1278         mapSeqToUnique.assign(numSeqs, -1);
1279         mapUniqueToSeq.assign(numSeqs, -1);
1280         
1281         vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
1282         
1283         for(int i=0;i<numSeqs;i++){
1284             
1285             if (m->control_pressed) { break; }
1286             
1287             int index = 0;
1288             
1289             vector<short> current(numFlowCells);
1290             for(int j=0;j<numFlowCells;j++){
1291                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
1292             }
1293             
1294             for(int j=0;j<numUniques;j++){
1295                 int offset = j * numFlowCells;
1296                 bool toEnd = 1;
1297                 
1298                 int shorterLength;
1299                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
1300                 else                                                            {       shorterLength = uniqueLengths[j];       }
1301                 
1302                 for(int k=0;k<shorterLength;k++){
1303                     if(current[k] != uniqueFlowgrams[offset + k]){
1304                         toEnd = 0;
1305                         break;
1306                     }
1307                 }
1308                 
1309                 if(toEnd){
1310                     mapSeqToUnique[i] = j;
1311                     uniqueCount[j]++;
1312                     index = j;
1313                     if(lengths[i] > uniqueLengths[j])   {       uniqueLengths[j] = lengths[i];  }
1314                     break;
1315                 }
1316                 index++;
1317             }
1318             
1319             if(index == numUniques){
1320                 uniqueLengths[numUniques] = lengths[i];
1321                 uniqueCount[numUniques] = 1;
1322                 mapSeqToUnique[i] = numUniques;//anMap
1323                 mapUniqueToSeq[numUniques] = i;//anF
1324                 
1325                 for(int k=0;k<numFlowCells;k++){
1326                     uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
1327                     uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
1328                 }
1329                 
1330                 numUniques++;
1331             }
1332         }
1333         uniqueFlowDataIntI.resize(numFlowCells * numUniques);
1334         uniqueLengths.resize(numUniques);       
1335         
1336         flowDataPrI.resize(numSeqs * numFlowCells, 0);
1337         for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
1338     }
1339     catch(exception& e) {
1340         m->errorOut(e, "ShhherCommand", "getUniques");
1341         exit(1);
1342     }
1343 }
1344
1345 /**************************************************************************************************/
1346
1347 float ShhherCommand::calcPairwiseDist(int seqA, int seqB){
1348     try{
1349         int minLength = lengths[mapSeqToUnique[seqA]];
1350         if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
1351         
1352         int ANumFlowCells = seqA * numFlowCells;
1353         int BNumFlowCells = seqB * numFlowCells;
1354         
1355         float dist = 0;
1356         
1357         for(int i=0;i<minLength;i++){
1358             
1359             if (m->control_pressed) { break; }
1360             
1361             int flowAIntI = flowDataIntI[ANumFlowCells + i];
1362             float flowAPrI = flowDataPrI[ANumFlowCells + i];
1363             
1364             int flowBIntI = flowDataIntI[BNumFlowCells + i];
1365             float flowBPrI = flowDataPrI[BNumFlowCells + i];
1366             dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
1367         }
1368         
1369         dist /= (float) minLength;
1370         return dist;
1371     }
1372     catch(exception& e) {
1373         m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
1374         exit(1);
1375     }
1376 }
1377
1378 //**********************************************************************************************************************/
1379
1380 string ShhherCommand::cluster(string distFileName, string namesFileName){
1381     try {
1382         
1383         ReadMatrix* read = new ReadColumnMatrix(distFileName);  
1384                 read->setCutoff(cutoff);
1385                 
1386                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
1387                 clusterNameMap->readMap();
1388                 read->read(clusterNameMap);
1389         
1390                 ListVector* list = read->getListVector();
1391                 SparseDistanceMatrix* matrix = read->getDMatrix();
1392                 
1393                 delete read; 
1394                 delete clusterNameMap; 
1395         
1396         RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
1397         
1398         Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
1399         string tag = cluster->getTag();
1400         
1401         double clusterCutoff = cutoff;
1402         while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
1403             
1404             if (m->control_pressed) { break; }
1405             
1406             cluster->update(clusterCutoff);
1407         }
1408         
1409         list->setLabel(toString(cutoff));
1410         
1411         string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
1412         ofstream listFile;
1413         m->openOutputFile(listFileName, listFile);
1414         list->print(listFile);
1415         listFile.close();
1416         
1417         delete matrix;  delete cluster; delete rabund; delete list;
1418         
1419         return listFileName;
1420     }
1421     catch(exception& e) {
1422         m->errorOut(e, "ShhherCommand", "cluster");
1423         exit(1);        
1424     }           
1425 }
1426
1427 /**************************************************************************************************/
1428
1429 void ShhherCommand::calcCentroidsDriver(int start, int finish){                          
1430     
1431     //this function gets the most likely homopolymer length at a flow position for a group of sequences
1432     //within an otu
1433     
1434     try{
1435         
1436         for(int i=start;i<finish;i++){
1437             
1438             if (m->control_pressed) { break; }
1439             
1440             double count = 0;
1441             int position = 0;
1442             int minFlowGram = 100000000;
1443             double minFlowValue = 1e8;
1444             change[i] = 0; //FALSE
1445             
1446             for(int j=0;j<nSeqsPerOTU[i];j++){
1447                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1448             }
1449             
1450             if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1451                 vector<double> adF(nSeqsPerOTU[i]);
1452                 vector<int> anL(nSeqsPerOTU[i]);
1453                 
1454                 for(int j=0;j<nSeqsPerOTU[i];j++){
1455                     int index = cumNumSeqs[i] + j;
1456                     int nI = seqIndex[index];
1457                     int nIU = mapSeqToUnique[nI];
1458                     
1459                     int k;
1460                     for(k=0;k<position;k++){
1461                         if(nIU == anL[k]){
1462                             break;
1463                         }
1464                     }
1465                     if(k == position){
1466                         anL[position] = nIU;
1467                         adF[position] = 0.0000;
1468                         position++;
1469                     }                                           
1470                 }
1471                 
1472                 for(int j=0;j<nSeqsPerOTU[i];j++){
1473                     int index = cumNumSeqs[i] + j;
1474                     int nI = seqIndex[index];
1475                     
1476                     double tauValue = singleTau[seqNumber[index]];
1477                     
1478                     for(int k=0;k<position;k++){
1479                         double dist = getDistToCentroid(anL[k], nI, lengths[nI]);
1480                         adF[k] += dist * tauValue;
1481                     }
1482                 }
1483                 
1484                 for(int j=0;j<position;j++){
1485                     if(adF[j] < minFlowValue){
1486                         minFlowGram = j;
1487                         minFlowValue = adF[j];
1488                     }
1489                 }
1490                 
1491                 if(centroids[i] != anL[minFlowGram]){
1492                     change[i] = 1;
1493                     centroids[i] = anL[minFlowGram];
1494                 }
1495             }
1496             else if(centroids[i] != -1){
1497                 change[i] = 1;
1498                 centroids[i] = -1;                      
1499             }
1500         }
1501     }
1502     catch(exception& e) {
1503         m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
1504         exit(1);        
1505     }           
1506 }
1507
1508 /**************************************************************************************************/
1509
1510 double ShhherCommand::getDistToCentroid(int cent, int flow, int length){
1511     try{
1512         
1513         int flowAValue = cent * numFlowCells;
1514         int flowBValue = flow * numFlowCells;
1515         
1516         double dist = 0;
1517         
1518         for(int i=0;i<length;i++){
1519             dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1520             flowAValue++;
1521             flowBValue++;
1522         }
1523         
1524         return dist / (double)length;
1525     }
1526     catch(exception& e) {
1527         m->errorOut(e, "ShhherCommand", "getDistToCentroid");
1528         exit(1);        
1529     }           
1530 }
1531
1532 /**************************************************************************************************/
1533
1534 double ShhherCommand::getNewWeights(){
1535     try{
1536         
1537         double maxChange = 0;
1538         
1539         for(int i=0;i<numOTUs;i++){
1540             
1541             if (m->control_pressed) { break; }
1542             
1543             double difference = weight[i];
1544             weight[i] = 0;
1545             
1546             for(int j=0;j<nSeqsPerOTU[i];j++){
1547                 int index = cumNumSeqs[i] + j;
1548                 double tauValue = singleTau[seqNumber[index]];
1549                 weight[i] += tauValue;
1550             }
1551             
1552             difference = fabs(weight[i] - difference);
1553             if(difference > maxChange){ maxChange = difference; }
1554         }
1555         return maxChange;
1556     }
1557     catch(exception& e) {
1558         m->errorOut(e, "ShhherCommand", "getNewWeights");
1559         exit(1);        
1560     }           
1561 }
1562  
1563  /**************************************************************************************************/
1564  
1565 double ShhherCommand::getLikelihood(){
1566     
1567     try{
1568         
1569         vector<long double> P(numSeqs, 0);
1570         int effNumOTUs = 0;
1571         
1572         for(int i=0;i<numOTUs;i++){
1573             if(weight[i] > MIN_WEIGHT){
1574                 effNumOTUs++;
1575             }
1576         }
1577         
1578         string hold;
1579         for(int i=0;i<numOTUs;i++){
1580             
1581             if (m->control_pressed) { break; }
1582             
1583             for(int j=0;j<nSeqsPerOTU[i];j++){
1584                 int index = cumNumSeqs[i] + j;
1585                 int nI = seqIndex[index];
1586                 double singleDist = dist[seqNumber[index]];
1587                 
1588                 P[nI] += weight[i] * exp(-singleDist * sigma);
1589             }
1590         }
1591         double nLL = 0.00;
1592         for(int i=0;i<numSeqs;i++){
1593             if(P[i] == 0){      P[i] = DBL_EPSILON;     }
1594             
1595             nLL += -log(P[i]);
1596         }
1597         
1598         nLL = nLL -(double)numSeqs * log(sigma);
1599         
1600         return nLL; 
1601     }
1602     catch(exception& e) {
1603         m->errorOut(e, "ShhherCommand", "getNewWeights");
1604         exit(1);        
1605     }           
1606 }
1607
1608 /**************************************************************************************************/
1609
1610 void ShhherCommand::checkCentroids(){
1611     try{
1612         vector<int> unique(numOTUs, 1);
1613         
1614         for(int i=0;i<numOTUs;i++){
1615             if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
1616                 unique[i] = -1;
1617             }
1618         }
1619         
1620         for(int i=0;i<numOTUs;i++){
1621             
1622             if (m->control_pressed) { break; }
1623             
1624             if(unique[i] == 1){
1625                 for(int j=i+1;j<numOTUs;j++){
1626                     if(unique[j] == 1){
1627                         
1628                         if(centroids[j] == centroids[i]){
1629                             unique[j] = 0;
1630                             centroids[j] = -1;
1631                             
1632                             weight[i] += weight[j];
1633                             weight[j] = 0.0;
1634                         }
1635                     }
1636                 }
1637             }
1638         }
1639     }
1640     catch(exception& e) {
1641         m->errorOut(e, "ShhherCommand", "checkCentroids");
1642         exit(1);        
1643     }           
1644 }
1645  /**************************************************************************************************/
1646
1647
1648  
1649 void ShhherCommand::writeQualities(vector<int> otuCounts){
1650     
1651     try {
1652         string thisOutputDir = outputDir;
1653         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1654         string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile");
1655         
1656         ofstream qualityFile;
1657         m->openOutputFile(qualityFileName, qualityFile);
1658         
1659         qualityFile.setf(ios::fixed, ios::floatfield);
1660         qualityFile.setf(ios::showpoint);
1661         qualityFile << setprecision(6);
1662         
1663         vector<vector<int> > qualities(numOTUs);
1664         vector<double> pr(HOMOPS, 0);
1665         
1666         
1667         for(int i=0;i<numOTUs;i++){
1668             
1669             if (m->control_pressed) { break; }
1670             
1671             int index = 0;
1672             int base = 0;
1673             
1674             if(nSeqsPerOTU[i] > 0){
1675                 qualities[i].assign(1024, -1);
1676                 
1677                 while(index < numFlowCells){
1678                     double maxPrValue = 1e8;
1679                     short maxPrIndex = -1;
1680                     double count = 0.0000;
1681                     
1682                     pr.assign(HOMOPS, 0);
1683                     
1684                     for(int j=0;j<nSeqsPerOTU[i];j++){
1685                         int lIndex = cumNumSeqs[i] + j;
1686                         double tauValue = singleTau[seqNumber[lIndex]];
1687                         int sequenceIndex = aaI[i][j];
1688                         short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1689                         
1690                         count += tauValue;
1691                         
1692                         for(int s=0;s<HOMOPS;s++){
1693                             pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
1694                         }
1695                     }
1696                     
1697                     maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1698                     maxPrValue = pr[maxPrIndex];
1699                     
1700                     if(count > MIN_COUNT){
1701                         double U = 0.0000;
1702                         double norm = 0.0000;
1703                         
1704                         for(int s=0;s<HOMOPS;s++){
1705                             norm += exp(-(pr[s] - maxPrValue));
1706                         }
1707                         
1708                         for(int s=1;s<=maxPrIndex;s++){
1709                             int value = 0;
1710                             double temp = 0.0000;
1711                             
1712                             U += exp(-(pr[s-1]-maxPrValue))/norm;
1713                             
1714                             if(U>0.00){
1715                                 temp = log10(U);
1716                             }
1717                             else{
1718                                 temp = -10.1;
1719                             }
1720                             temp = floor(-10 * temp);
1721                             value = (int)floor(temp);
1722                             if(value > 100){    value = 100;    }
1723                             
1724                             qualities[i][base] = (int)value;
1725                             base++;
1726                         }
1727                     }
1728                     
1729                     index++;
1730                 }
1731             }
1732             
1733             
1734             if(otuCounts[i] > 0){
1735                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1736                 
1737                 int j=4;        //need to get past the first four bases
1738                 while(qualities[i][j] != -1){
1739                     qualityFile << qualities[i][j] << ' ';
1740                     j++;
1741                 }
1742                 qualityFile << endl;
1743             }
1744         }
1745         qualityFile.close();
1746         outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName);
1747         
1748     }
1749     catch(exception& e) {
1750         m->errorOut(e, "ShhherCommand", "writeQualities");
1751         exit(1);        
1752     }           
1753 }
1754
1755 /**************************************************************************************************/
1756
1757 void ShhherCommand::writeSequences(vector<int> otuCounts){
1758     try {
1759         string thisOutputDir = outputDir;
1760         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1761         string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
1762         ofstream fastaFile;
1763         m->openOutputFile(fastaFileName, fastaFile);
1764         
1765         vector<string> names(numOTUs, "");
1766         
1767         for(int i=0;i<numOTUs;i++){
1768             
1769             if (m->control_pressed) { break; }
1770             
1771             int index = centroids[i];
1772             
1773             if(otuCounts[i] > 0){
1774                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1775                 
1776                 string newSeq = "";
1777                 
1778                 for(int j=0;j<numFlowCells;j++){
1779                     
1780                     char base = flowOrder[j % 4];
1781                     for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1782                         newSeq += base;
1783                     }
1784                 }
1785                 
1786                 fastaFile << newSeq.substr(4) << endl;
1787             }
1788         }
1789         fastaFile.close();
1790         
1791         outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
1792         
1793         if(compositeFASTAFileName != ""){
1794             m->appendFiles(fastaFileName, compositeFASTAFileName);
1795         }
1796     }
1797     catch(exception& e) {
1798         m->errorOut(e, "ShhherCommand", "writeSequences");
1799         exit(1);        
1800     }           
1801 }
1802
1803 /**************************************************************************************************/
1804
1805 void ShhherCommand::writeNames(vector<int> otuCounts){
1806     try {
1807         string thisOutputDir = outputDir;
1808         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1809         string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
1810         ofstream nameFile;
1811         m->openOutputFile(nameFileName, nameFile);
1812         
1813         for(int i=0;i<numOTUs;i++){
1814             
1815             if (m->control_pressed) { break; }
1816             
1817             if(otuCounts[i] > 0){
1818                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1819                 
1820                 for(int j=1;j<nSeqsPerOTU[i];j++){
1821                     nameFile << ',' << seqNameVector[aaI[i][j]];
1822                 }
1823                 
1824                 nameFile << endl;
1825             }
1826         }
1827         nameFile.close();
1828         outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
1829         
1830         
1831         if(compositeNamesFileName != ""){
1832             m->appendFiles(nameFileName, compositeNamesFileName);
1833         }               
1834     }
1835     catch(exception& e) {
1836         m->errorOut(e, "ShhherCommand", "writeNames");
1837         exit(1);        
1838     }           
1839 }
1840
1841 /**************************************************************************************************/
1842
1843 void ShhherCommand::writeGroups(){
1844     try {
1845         string thisOutputDir = outputDir;
1846         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1847         string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
1848         int pos = fileRoot.find_first_of('.');
1849         string fileGroup = fileRoot;
1850         if (pos != string::npos) {  fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1)));  }
1851         string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
1852         ofstream groupFile;
1853         m->openOutputFile(groupFileName, groupFile);
1854         
1855         for(int i=0;i<numSeqs;i++){
1856             if (m->control_pressed) { break; }
1857             groupFile << seqNameVector[i] << '\t' << fileGroup << endl;
1858         }
1859         groupFile.close();
1860         outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName);
1861         
1862     }
1863     catch(exception& e) {
1864         m->errorOut(e, "ShhherCommand", "writeGroups");
1865         exit(1);        
1866     }           
1867 }
1868
1869 /**************************************************************************************************/
1870
1871 void ShhherCommand::writeClusters(vector<int> otuCounts){
1872     try {
1873         string thisOutputDir = outputDir;
1874         if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
1875         string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) +getOutputFileNameTag("counts");
1876         ofstream otuCountsFile;
1877         m->openOutputFile(otuCountsFileName, otuCountsFile);
1878         
1879         string bases = flowOrder;
1880         
1881         for(int i=0;i<numOTUs;i++){
1882             
1883             if (m->control_pressed) {
1884                 break;
1885             }
1886             //output the translated version of the centroid sequence for the otu
1887             if(otuCounts[i] > 0){
1888                 int index = centroids[i];
1889                 
1890                 otuCountsFile << "ideal\t";
1891                 for(int j=8;j<numFlowCells;j++){
1892                     char base = bases[j % 4];
1893                     for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1894                         otuCountsFile << base;
1895                     }
1896                 }
1897                 otuCountsFile << endl;
1898                 
1899                 for(int j=0;j<nSeqsPerOTU[i];j++){
1900                     int sequence = aaI[i][j];
1901                     otuCountsFile << seqNameVector[sequence] << '\t';
1902                     
1903                     string newSeq = "";
1904                     
1905                     for(int k=0;k<lengths[sequence];k++){
1906                         char base = bases[k % 4];
1907                         int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1908                         
1909                         for(int s=0;s<freq;s++){
1910                             newSeq += base;
1911                             //otuCountsFile << base;
1912                         }
1913                     }
1914                     otuCountsFile << newSeq.substr(4) << endl;
1915                 }
1916                 otuCountsFile << endl;
1917             }
1918         }
1919         otuCountsFile.close();
1920         outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName);
1921         
1922     }
1923     catch(exception& e) {
1924         m->errorOut(e, "ShhherCommand", "writeClusters");
1925         exit(1);        
1926     }           
1927 }
1928
1929 #else
1930 //**********************************************************************************************************************
1931
1932 int ShhherCommand::execute(){
1933         try {
1934                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
1935                 
1936                 getSingleLookUp();      if (m->control_pressed) { return 0; }
1937                 getJointLookUp();       if (m->control_pressed) { return 0; }
1938                 
1939         int numFiles = flowFileVector.size();
1940                 
1941         if (numFiles < processors) { processors = numFiles; }
1942         
1943 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1944         if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
1945         else { createProcesses(flowFileVector); } //each processor processes one file
1946 #else
1947         driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
1948 #endif
1949         
1950                 if(compositeFASTAFileName != ""){
1951                         outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName);
1952                         outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName);
1953                 }
1954
1955                 m->mothurOutEndLine();
1956                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
1957                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
1958                 m->mothurOutEndLine();
1959                 
1960                 return 0;
1961         }
1962         catch(exception& e) {
1963                 m->errorOut(e, "ShhherCommand", "execute");
1964                 exit(1);
1965         }
1966 }
1967 #endif
1968 /**************************************************************************************************/
1969
1970 int ShhherCommand::createProcesses(vector<string> filenames){
1971     try {
1972         vector<int> processIDS;
1973                 int process = 1;
1974                 int num = 0;
1975                 
1976                 //sanity check
1977                 if (filenames.size() < processors) { processors = filenames.size(); }
1978                 
1979                 //divide the groups between the processors
1980                 vector<linePair> lines;
1981         vector<int> numFilesToComplete;
1982                 int numFilesPerProcessor = filenames.size() / processors;
1983                 for (int i = 0; i < processors; i++) {
1984                         int startIndex =  i * numFilesPerProcessor;
1985                         int endIndex = (i+1) * numFilesPerProcessor;
1986                         if(i == (processors - 1)){      endIndex = filenames.size();    }
1987                         lines.push_back(linePair(startIndex, endIndex));
1988             numFilesToComplete.push_back((endIndex-startIndex));
1989                 }
1990                 
1991         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1992                 
1993                 //loop through and create all the processes you want
1994                 while (process != processors) {
1995                         int pid = fork();
1996                         
1997                         if (pid > 0) {
1998                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1999                                 process++;
2000                         }else if (pid == 0){
2001                                 num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
2002                 
2003                 //pass numSeqs to parent
2004                                 ofstream out;
2005                                 string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp";
2006                                 m->openOutputFile(tempFile, out);
2007                                 out << num << endl;
2008                                 out.close();
2009                 
2010                                 exit(0);
2011                         }else { 
2012                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
2013                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
2014                                 exit(0);
2015                         }
2016                 }
2017                 
2018                 //do my part
2019                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
2020                 
2021                 //force parent to wait until all the processes are done
2022                 for (int i=0;i<processIDS.size();i++) { 
2023                         int temp = processIDS[i];
2024                         wait(&temp);
2025                 }
2026         
2027         #else
2028         
2029         //////////////////////////////////////////////////////////////////////////////////////////////////////
2030         
2031         /////////////////////// NOT WORKING, ACCESS VIOLATION ON READ OF FLOWGRAMS IN THREAD /////////////////
2032         
2033         //////////////////////////////////////////////////////////////////////////////////////////////////////
2034                 //Windows version shared memory, so be careful when passing variables through the shhhFlowsData struct. 
2035                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
2036                 //////////////////////////////////////////////////////////////////////////////////////////////////////
2037                 /*
2038                 vector<shhhFlowsData*> pDataArray; 
2039                 DWORD   dwThreadIdArray[processors-1];
2040                 HANDLE  hThreadArray[processors-1]; 
2041                 
2042                 //Create processor worker threads.
2043                 for( int i=0; i<processors-1; i++ ){
2044                         // Allocate memory for thread data.
2045                         string extension = "";
2046                         if (i != 0) { extension = toString(i) + ".temp"; }
2047                         
2048             shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
2049                         pDataArray.push_back(tempFlow);
2050                         processIDS.push_back(i);
2051             
2052                         hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
2053                 }
2054                 
2055                 //using the main process as a worker saves time and memory
2056                 //do my part
2057                 driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
2058                 
2059                 //Wait until all threads have terminated.
2060                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
2061                 
2062                 //Close all thread handles and free memory allocations.
2063                 for(int i=0; i < pDataArray.size(); i++){
2064                         for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
2065                         CloseHandle(hThreadArray[i]);
2066                         delete pDataArray[i];
2067                 }
2068                 */
2069         #endif
2070         
2071         for (int i=0;i<processIDS.size();i++) { 
2072             ifstream in;
2073                         string tempFile =  compositeFASTAFileName + toString(processIDS[i]) + ".num.temp";
2074                         m->openInputFile(tempFile, in);
2075                         if (!in.eof()) { 
2076                 int tempNum = 0; 
2077                 in >> tempNum; 
2078                 if (tempNum != numFilesToComplete[i+1]) {
2079                     m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
2080                 }
2081             }
2082                         in.close(); m->mothurRemove(tempFile);
2083             
2084             if (compositeFASTAFileName != "") {
2085                 m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
2086                 m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
2087                 m->mothurRemove((compositeFASTAFileName + toString(processIDS[i]) + ".temp"));
2088                 m->mothurRemove((compositeNamesFileName + toString(processIDS[i]) + ".temp"));
2089             }
2090         }
2091         
2092         return 0;
2093         
2094     }
2095         catch(exception& e) {
2096                 m->errorOut(e, "ShhherCommand", "createProcesses");
2097                 exit(1);
2098         }
2099 }
2100 /**************************************************************************************************/
2101
2102 vector<string> ShhherCommand::parseFlowFiles(string filename){
2103     try {
2104         vector<string> files;
2105         int count = 0;
2106         
2107         ifstream in;
2108         m->openInputFile(filename, in);
2109         
2110         int thisNumFLows = 0;
2111         in >> thisNumFLows; m->gobble(in);
2112         
2113         while (!in.eof()) {
2114             if (m->control_pressed) { break; }
2115             
2116             ofstream out;
2117             string outputFileName = filename + toString(count) + ".temp";
2118             m->openOutputFile(outputFileName, out);
2119             out << thisNumFLows << endl;
2120             files.push_back(outputFileName);
2121             
2122             int numLinesWrote = 0;
2123             for (int i = 0; i < largeSize; i++) {
2124                 if (in.eof()) { break; }
2125                 string line = m->getline(in); m->gobble(in);
2126                 out << line << endl;
2127                 numLinesWrote++;
2128             }
2129             out.close();
2130             
2131             if (numLinesWrote == 0) {  m->mothurRemove(outputFileName); files.pop_back();  }
2132             count++;
2133         }
2134         in.close();
2135         
2136         if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }  files.clear(); }
2137         
2138         m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); 
2139         
2140         return files;
2141     }
2142         catch(exception& e) {
2143                 m->errorOut(e, "ShhherCommand", "parseFlowFiles");
2144                 exit(1);
2145         }
2146 }
2147 /**************************************************************************************************/
2148
2149 int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
2150     try {
2151         
2152         int numCompleted = 0;
2153         
2154         for(int i=start;i<end;i++){
2155                         
2156                         if (m->control_pressed) { break; }
2157                         
2158             vector<string> theseFlowFileNames; theseFlowFileNames.push_back(filenames[i]);
2159             if (large) {  theseFlowFileNames = parseFlowFiles(filenames[i]);  }
2160             
2161             if (m->control_pressed) { break; }
2162             
2163             double begClock = clock();
2164             unsigned long long begTime;
2165             
2166             for (int g = 0; g < theseFlowFileNames.size(); g++) {
2167                 
2168                 string flowFileName = theseFlowFileNames[g];
2169                 m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n");
2170                 m->mothurOut("Reading flowgrams...\n");
2171                 
2172                 vector<string> seqNameVector;
2173                 vector<int> lengths;
2174                 vector<short> flowDataIntI;
2175                 vector<double> flowDataPrI;
2176                 map<string, int> nameMap;
2177                 vector<short> uniqueFlowgrams;
2178                 vector<int> uniqueCount;
2179                 vector<int> mapSeqToUnique;
2180                 vector<int> mapUniqueToSeq;
2181                 vector<int> uniqueLengths;
2182                 int numFlowCells;
2183                 
2184                 if (m->debug) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); }
2185                 int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
2186                 
2187                 if (m->control_pressed) { break; }
2188                 
2189                 m->mothurOut("Identifying unique flowgrams...\n");
2190                 int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
2191                 
2192                 if (m->control_pressed) { break; }
2193                 
2194                 m->mothurOut("Calculating distances between flowgrams...\n");
2195                 string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
2196                 begTime = time(NULL);
2197                
2198                 
2199                 flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI); 
2200                 
2201                 m->mothurOutEndLine();
2202                 m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
2203                 
2204                 
2205                 string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
2206                 createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
2207                 
2208                 if (m->control_pressed) { break; }
2209                 
2210                 m->mothurOut("\nClustering flowgrams...\n");
2211                 string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
2212                 cluster(listFileName, distFileName, namesFileName);
2213                 
2214                 if (m->control_pressed) { break; }
2215                 
2216                 vector<int> otuData;
2217                 vector<int> cumNumSeqs;
2218                 vector<int> nSeqsPerOTU;
2219                 vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2220                 vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2221                 vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
2222                 vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
2223                 
2224                 
2225                 int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
2226                 
2227                 if (m->control_pressed) { break; }
2228                 
2229                 m->mothurRemove(distFileName);
2230                 m->mothurRemove(namesFileName);
2231                 m->mothurRemove(listFileName);
2232                 
2233                 vector<double> dist;            //adDist - distance of sequences to centroids
2234                 vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
2235                 vector<int> centroids;          //the representative flowgram for each cluster m
2236                 vector<double> weight;
2237                 vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2238                 vector<int> nSeqsBreaks;
2239                 vector<int> nOTUsBreaks;
2240                 
2241                 if (m->debug) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); }
2242                 
2243                 dist.assign(numSeqs * numOTUs, 0);
2244                 change.assign(numOTUs, 1);
2245                 centroids.assign(numOTUs, -1);
2246                 weight.assign(numOTUs, 0);
2247                 singleTau.assign(numSeqs, 1.0);
2248                 
2249                 nSeqsBreaks.assign(2, 0);
2250                 nOTUsBreaks.assign(2, 0);
2251                 
2252                 nSeqsBreaks[0] = 0;
2253                 nSeqsBreaks[1] = numSeqs;
2254                 nOTUsBreaks[1] = numOTUs;
2255                 
2256                 if (m->debug) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); }
2257                 
2258                 if (m->control_pressed) { break; }
2259                 
2260                 double maxDelta = 0;
2261                 int iter = 0;
2262                 
2263                 begClock = clock();
2264                 begTime = time(NULL);
2265                 
2266                 m->mothurOut("\nDenoising flowgrams...\n");
2267                 m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
2268                 
2269                 while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){
2270                     
2271                     if (m->control_pressed) { break; }
2272                     
2273                     double cycClock = clock();
2274                     unsigned long long cycTime = time(NULL);
2275                     fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2276                     
2277                     if (m->control_pressed) { break; }
2278                     
2279                     calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
2280                     
2281                     if (m->control_pressed) { break; }
2282                     
2283                     maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
2284                     
2285                     if (m->control_pressed) { break; }
2286                     
2287                     double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
2288                     
2289                     if (m->control_pressed) { break; }
2290                     
2291                     checkCentroids(numOTUs, centroids, weight);
2292                     
2293                     if (m->control_pressed) { break; }
2294                     
2295                     calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
2296                     
2297                     if (m->control_pressed) { break; }
2298                     
2299                     iter++;
2300                     
2301                     m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
2302                     
2303                 }       
2304                 
2305                 if (m->control_pressed) { break; }
2306                 
2307                 m->mothurOut("\nFinalizing...\n");
2308                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
2309                 
2310                 if (m->control_pressed) { break; }
2311                 
2312                 setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
2313                 
2314                 if (m->control_pressed) { break; }
2315                 
2316                 vector<int> otuCounts(numOTUs, 0);
2317                 for(int j=0;j<numSeqs;j++)      {       otuCounts[otuData[j]]++;        }
2318                 
2319                 calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); 
2320                 
2321                 if (m->control_pressed) { break; }
2322                 
2323                 if ((large) && (g == 0)) {  flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; }
2324                 string thisOutputDir = outputDir;
2325                 if (outputDir == "") {  thisOutputDir = m->hasPath(flowFileName);  }
2326                 string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("qfile");
2327                 string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("fasta");
2328                 string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("name");
2329                 string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + getOutputFileNameTag("counts");
2330                 string fileRoot = m->getRootName(m->getSimpleName(flowFileName));
2331                 int pos = fileRoot.find_first_of('.');
2332                 string fileGroup = fileRoot;
2333                 if (pos != string::npos) {  fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1)));  }
2334                 string groupFileName = thisOutputDir + fileRoot + getOutputFileNameTag("group");
2335
2336                 
2337                 writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->control_pressed) { break; }
2338                 writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->control_pressed) { break; }
2339                 writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                              if (m->control_pressed) { break; }
2340                 writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                  if (m->control_pressed) { break; }
2341                 writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector);                                          if (m->control_pressed) { break; }
2342                 
2343                 if (large) {
2344                     if (g > 0) {
2345                         m->appendFiles(qualityFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("qfile")));
2346                         m->mothurRemove(qualityFileName);
2347                         m->appendFiles(fastaFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("fasta")));
2348                         m->mothurRemove(fastaFileName);
2349                         m->appendFiles(nameFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("name")));
2350                         m->mothurRemove(nameFileName);
2351                         m->appendFiles(otuCountsFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("counts")));
2352                         m->mothurRemove(otuCountsFileName);
2353                         m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + getOutputFileNameTag("group")));
2354                         m->mothurRemove(groupFileName);
2355                     }
2356                     m->mothurRemove(theseFlowFileNames[g]);
2357                 }
2358                         }
2359             
2360             numCompleted++;
2361                         m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
2362                 }
2363                 
2364         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
2365         
2366         return numCompleted;
2367         
2368     }catch(exception& e) {
2369             m->errorOut(e, "ShhherCommand", "driver");
2370             exit(1);
2371     }
2372 }
2373
2374 /**************************************************************************************************/
2375 int ShhherCommand::getFlowData(string filename, vector<string>& thisSeqNameVector, vector<int>& thisLengths, vector<short>& thisFlowDataIntI, map<string, int>& thisNameMap, int& numFlowCells){
2376         try{
2377        
2378                 ifstream flowFile;
2379        
2380                 m->openInputFile(filename, flowFile);
2381                 
2382                 string seqName;
2383                 int currentNumFlowCells;
2384                 float intensity;
2385         thisSeqNameVector.clear();
2386                 thisLengths.clear();
2387                 thisFlowDataIntI.clear();
2388                 thisNameMap.clear();
2389                 
2390                 string numFlowTest;
2391         flowFile >> numFlowTest;
2392         
2393         if (!m->isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?"); m->mothurOutEndLine(); exit(1); }
2394         else { convert(numFlowTest, numFlowCells); }
2395         
2396         if (m->debug) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); }
2397                 int index = 0;//pcluster
2398                 while(!flowFile.eof()){
2399                         
2400                         if (m->control_pressed) { break; }
2401                         
2402                         flowFile >> seqName >> currentNumFlowCells;
2403             
2404                         thisLengths.push_back(currentNumFlowCells);
2405            
2406                         thisSeqNameVector.push_back(seqName);
2407                         thisNameMap[seqName] = index++;//pcluster
2408             
2409             if (m->debug) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); }
2410             
2411                         for(int i=0;i<numFlowCells;i++){
2412                                 flowFile >> intensity;
2413                                 if(intensity > 9.99)    {       intensity = 9.99;       }
2414                                 int intI = int(100 * intensity + 0.0001);
2415                                 thisFlowDataIntI.push_back(intI);
2416                         }
2417                         m->gobble(flowFile);
2418                 }
2419                 flowFile.close();
2420                 
2421                 int numSeqs = thisSeqNameVector.size();         
2422                 
2423                 for(int i=0;i<numSeqs;i++){
2424                         
2425                         if (m->control_pressed) { break; }
2426                         
2427                         int iNumFlowCells = i * numFlowCells;
2428                         for(int j=thisLengths[i];j<numFlowCells;j++){
2429                                 thisFlowDataIntI[iNumFlowCells + j] = 0;
2430                         }
2431                 }
2432         
2433         return numSeqs;
2434                 
2435         }
2436         catch(exception& e) {
2437                 m->errorOut(e, "ShhherCommand", "getFlowData");
2438                 exit(1);
2439         }
2440 }
2441 /**************************************************************************************************/
2442
2443 int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector<int>& mapUniqueToSeq, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2444         try{            
2445         
2446                 ostringstream outStream;
2447                 outStream.setf(ios::fixed, ios::floatfield);
2448                 outStream.setf(ios::dec, ios::basefield);
2449                 outStream.setf(ios::showpoint);
2450                 outStream.precision(6);
2451                 
2452                 int begTime = time(NULL);
2453                 double begClock = clock();
2454         
2455                 for(int i=0;i<stopSeq;i++){
2456                         
2457                         if (m->control_pressed) { break; }
2458                         
2459                         for(int j=0;j<i;j++){
2460                                 float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
2461                 
2462                                 if(flowDistance < 1e-6){
2463                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
2464                                 }
2465                                 else if(flowDistance <= cutoff){
2466                                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
2467                                 }
2468                         }
2469                         if(i % 100 == 0){
2470                                 m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
2471                                 m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2472                                 m->mothurOutEndLine();
2473                         }
2474                 }
2475                 
2476                 ofstream distFile(distFileName.c_str());
2477                 distFile << outStream.str();            
2478                 distFile.close();
2479                 
2480                 if (m->control_pressed) {}
2481                 else {
2482                         m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime));
2483                         m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
2484                         m->mothurOutEndLine();
2485                 }
2486         
2487         return 0;
2488         }
2489         catch(exception& e) {
2490                 m->errorOut(e, "ShhherCommand", "flowDistParentFork");
2491                 exit(1);
2492         }
2493 }
2494 /**************************************************************************************************/
2495
2496 float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector<int>& mapSeqToUnique, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2497         try{
2498                 int minLength = lengths[mapSeqToUnique[seqA]];
2499                 if(lengths[seqB] < minLength){  minLength = lengths[mapSeqToUnique[seqB]];      }
2500                 
2501                 int ANumFlowCells = seqA * numFlowCells;
2502                 int BNumFlowCells = seqB * numFlowCells;
2503                 
2504                 float dist = 0;
2505                 
2506                 for(int i=0;i<minLength;i++){
2507                         
2508                         if (m->control_pressed) { break; }
2509                         
2510                         int flowAIntI = flowDataIntI[ANumFlowCells + i];
2511                         float flowAPrI = flowDataPrI[ANumFlowCells + i];
2512                         
2513                         int flowBIntI = flowDataIntI[BNumFlowCells + i];
2514                         float flowBPrI = flowDataPrI[BNumFlowCells + i];
2515                         dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
2516                 }
2517                 
2518                 dist /= (float) minLength;
2519                 return dist;
2520         }
2521         catch(exception& e) {
2522                 m->errorOut(e, "ShhherCommand", "calcPairwiseDist");
2523                 exit(1);
2524         }
2525 }
2526
2527 /**************************************************************************************************/
2528
2529 int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector<short>& uniqueFlowgrams, vector<int>& uniqueCount, vector<int>& uniqueLengths, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq, vector<int>& lengths, vector<double>& flowDataPrI, vector<short>& flowDataIntI){
2530         try{
2531                 int numUniques = 0;
2532                 uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
2533                 uniqueCount.assign(numSeqs, 0);                                                 //      anWeights
2534                 uniqueLengths.assign(numSeqs, 0);
2535                 mapSeqToUnique.assign(numSeqs, -1);
2536                 mapUniqueToSeq.assign(numSeqs, -1);
2537                 
2538                 vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
2539                 
2540                 for(int i=0;i<numSeqs;i++){
2541                         
2542                         if (m->control_pressed) { break; }
2543                         
2544                         int index = 0;
2545                         
2546                         vector<short> current(numFlowCells);
2547                         for(int j=0;j<numFlowCells;j++){
2548                                 current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
2549                         }
2550             
2551                         for(int j=0;j<numUniques;j++){
2552                                 int offset = j * numFlowCells;
2553                                 bool toEnd = 1;
2554                                 
2555                                 int shorterLength;
2556                                 if(lengths[i] < uniqueLengths[j])       {       shorterLength = lengths[i];                     }
2557                                 else                                                            {       shorterLength = uniqueLengths[j];       }
2558                 
2559                                 for(int k=0;k<shorterLength;k++){
2560                                         if(current[k] != uniqueFlowgrams[offset + k]){
2561                                                 toEnd = 0;
2562                                                 break;
2563                                         }
2564                                 }
2565                                 
2566                                 if(toEnd){
2567                                         mapSeqToUnique[i] = j;
2568                                         uniqueCount[j]++;
2569                                         index = j;
2570                                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
2571                                         break;
2572                                 }
2573                                 index++;
2574                         }
2575                         
2576                         if(index == numUniques){
2577                                 uniqueLengths[numUniques] = lengths[i];
2578                                 uniqueCount[numUniques] = 1;
2579                                 mapSeqToUnique[i] = numUniques;//anMap
2580                                 mapUniqueToSeq[numUniques] = i;//anF
2581                                 
2582                                 for(int k=0;k<numFlowCells;k++){
2583                                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
2584                                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
2585                                 }
2586                                 
2587                                 numUniques++;
2588                         }
2589                 }
2590                 uniqueFlowDataIntI.resize(numFlowCells * numUniques);
2591                 uniqueLengths.resize(numUniques);       
2592                 
2593                 flowDataPrI.resize(numSeqs * numFlowCells, 0);
2594                 for(int i=0;i<flowDataPrI.size();i++)   {       if (m->control_pressed) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);          }
2595         
2596         return numUniques;
2597         }
2598         catch(exception& e) {
2599                 m->errorOut(e, "ShhherCommand", "getUniques");
2600                 exit(1);
2601         }
2602 }
2603 /**************************************************************************************************/
2604 int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector<string>& seqNameVector, vector<int>& mapSeqToUnique, vector<int>& mapUniqueToSeq){
2605         try{
2606                 
2607                 vector<string> duplicateNames(numUniques, "");
2608                 for(int i=0;i<numSeqs;i++){
2609                         duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
2610                 }
2611                 
2612                 ofstream nameFile;
2613                 m->openOutputFile(filename, nameFile);
2614                 
2615                 for(int i=0;i<numUniques;i++){
2616                         
2617                         if (m->control_pressed) { break; }
2618                         
2619             //                  nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2620                         nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
2621                 }
2622                 
2623                 nameFile.close();
2624         
2625                 return 0;
2626         }
2627         catch(exception& e) {
2628                 m->errorOut(e, "ShhherCommand", "createNamesFile");
2629                 exit(1);
2630         }
2631 }
2632 //**********************************************************************************************************************
2633
2634 int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){
2635         try {
2636                 
2637                 ReadMatrix* read = new ReadColumnMatrix(distFileName);  
2638                 read->setCutoff(cutoff);
2639                 
2640                 NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
2641                 clusterNameMap->readMap();
2642                 read->read(clusterNameMap);
2643         
2644                 ListVector* list = read->getListVector();
2645                 SparseDistanceMatrix* matrix = read->getDMatrix();
2646                 
2647                 delete read; 
2648                 delete clusterNameMap; 
2649         
2650                 RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
2651                 
2652                 Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); 
2653                 string tag = cluster->getTag();
2654                 
2655                 double clusterCutoff = cutoff;
2656                 while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
2657                         
2658                         if (m->control_pressed) { break; }
2659                         
2660                         cluster->update(clusterCutoff);
2661                 }
2662                 
2663                 list->setLabel(toString(cutoff));
2664                 
2665                 ofstream listFile;
2666                 m->openOutputFile(filename, listFile);
2667                 list->print(listFile);
2668                 listFile.close();
2669                 
2670                 delete matrix;  delete cluster; delete rabund; delete list;
2671         
2672                 return 0;
2673         }
2674         catch(exception& e) {
2675                 m->errorOut(e, "ShhherCommand", "cluster");
2676                 exit(1);        
2677         }               
2678 }
2679 /**************************************************************************************************/
2680
2681 int ShhherCommand::getOTUData(int numSeqs, string fileName,  vector<int>& otuData,
2682                                vector<int>& cumNumSeqs,
2683                                vector<int>& nSeqsPerOTU,
2684                                vector<vector<int> >& aaP,       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
2685                                vector<vector<int> >& aaI,       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
2686                                vector<int>& seqNumber,          //tMaster->anP:         the sequence id number sorted by OTU
2687                                vector<int>& seqIndex,
2688                                map<string, int>& nameMap){
2689         try {
2690         
2691                 ifstream listFile;
2692                 m->openInputFile(fileName, listFile);
2693                 string label;
2694         int numOTUs;
2695                 
2696                 listFile >> label >> numOTUs;
2697         
2698         if (m->debug) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); }
2699         
2700                 otuData.assign(numSeqs, 0);
2701                 cumNumSeqs.assign(numOTUs, 0);
2702                 nSeqsPerOTU.assign(numOTUs, 0);
2703                 aaP.clear();aaP.resize(numOTUs);
2704                 
2705                 seqNumber.clear();
2706                 aaI.clear();
2707                 seqIndex.clear();
2708                 
2709                 string singleOTU = "";
2710                 
2711                 for(int i=0;i<numOTUs;i++){
2712                         
2713                         if (m->control_pressed) { break; }
2714             if (m->debug) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); }
2715             
2716                         listFile >> singleOTU;
2717                         
2718                         istringstream otuString(singleOTU);
2719             
2720                         while(otuString){
2721                                 
2722                                 string seqName = "";
2723                                 
2724                                 for(int j=0;j<singleOTU.length();j++){
2725                                         char letter = otuString.get();
2726                                         
2727                                         if(letter != ','){
2728                                                 seqName += letter;
2729                                         }
2730                                         else{
2731                                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2732                                                 int index = nmIt->second;
2733                                                 
2734                                                 nameMap.erase(nmIt);
2735                                                 
2736                                                 otuData[index] = i;
2737                                                 nSeqsPerOTU[i]++;
2738                                                 aaP[i].push_back(index);
2739                                                 seqName = "";
2740                                         }
2741                                 }
2742                                 
2743                                 map<string,int>::iterator nmIt = nameMap.find(seqName);
2744                 
2745                                 int index = nmIt->second;
2746                                 nameMap.erase(nmIt);
2747                 
2748                                 otuData[index] = i;
2749                                 nSeqsPerOTU[i]++;
2750                                 aaP[i].push_back(index);        
2751                                 
2752                                 otuString.get();
2753                         }
2754                         
2755                         sort(aaP[i].begin(), aaP[i].end());
2756                         for(int j=0;j<nSeqsPerOTU[i];j++){
2757                                 seqNumber.push_back(aaP[i][j]);
2758                         }
2759                         for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
2760                                 aaP[i].push_back(0);
2761                         }
2762                         
2763                         
2764                 }
2765                 
2766                 for(int i=1;i<numOTUs;i++){
2767                         cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
2768                 }
2769                 aaI = aaP;
2770                 seqIndex = seqNumber;
2771                 
2772                 listFile.close();       
2773         
2774         return numOTUs;
2775                 
2776         }
2777         catch(exception& e) {
2778                 m->errorOut(e, "ShhherCommand", "getOTUData");
2779                 exit(1);        
2780         }               
2781 }
2782 /**************************************************************************************************/
2783
2784 int ShhherCommand::calcCentroidsDriver(int numOTUs, 
2785                                           vector<int>& cumNumSeqs,
2786                                           vector<int>& nSeqsPerOTU,
2787                                           vector<int>& seqIndex,
2788                                           vector<short>& change,                //did the centroid sequence change? 0 = no; 1 = yes
2789                                           vector<int>& centroids,               //the representative flowgram for each cluster m
2790                                           vector<double>& singleTau,    //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
2791                                           vector<int>& mapSeqToUnique,
2792                                           vector<short>& uniqueFlowgrams,
2793                                           vector<short>& flowDataIntI,
2794                                           vector<int>& lengths,
2795                                           int numFlowCells,
2796                                           vector<int>& seqNumber){                          
2797         
2798         //this function gets the most likely homopolymer length at a flow position for a group of sequences
2799         //within an otu
2800         
2801         try{
2802                 
2803                 for(int i=0;i<numOTUs;i++){
2804                         
2805                         if (m->control_pressed) { break; }
2806                         
2807                         double count = 0;
2808                         int position = 0;
2809                         int minFlowGram = 100000000;
2810                         double minFlowValue = 1e8;
2811                         change[i] = 0; //FALSE
2812                         
2813                         for(int j=0;j<nSeqsPerOTU[i];j++){
2814                                 count += singleTau[seqNumber[cumNumSeqs[i] + j]];
2815                         }
2816             
2817                         if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
2818                                 vector<double> adF(nSeqsPerOTU[i]);
2819                                 vector<int> anL(nSeqsPerOTU[i]);
2820                                 
2821                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2822                                         int index = cumNumSeqs[i] + j;
2823                                         int nI = seqIndex[index];
2824                                         int nIU = mapSeqToUnique[nI];
2825                                         
2826                                         int k;
2827                                         for(k=0;k<position;k++){
2828                                                 if(nIU == anL[k]){
2829                                                         break;
2830                                                 }
2831                                         }
2832                                         if(k == position){
2833                                                 anL[position] = nIU;
2834                                                 adF[position] = 0.0000;
2835                                                 position++;
2836                                         }                                               
2837                                 }
2838                                 
2839                                 for(int j=0;j<nSeqsPerOTU[i];j++){
2840                                         int index = cumNumSeqs[i] + j;
2841                                         int nI = seqIndex[index];
2842                                         
2843                                         double tauValue = singleTau[seqNumber[index]];
2844                                         
2845                                         for(int k=0;k<position;k++){
2846                                                 double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
2847                                                 adF[k] += dist * tauValue;
2848                                         }
2849                                 }
2850                                 
2851                                 for(int j=0;j<position;j++){
2852                                         if(adF[j] < minFlowValue){
2853                                                 minFlowGram = j;
2854                                                 minFlowValue = adF[j];
2855                                         }
2856                                 }
2857                                 
2858                                 if(centroids[i] != anL[minFlowGram]){
2859                                         change[i] = 1;
2860                                         centroids[i] = anL[minFlowGram];
2861                                 }
2862                         }
2863                         else if(centroids[i] != -1){
2864                                 change[i] = 1;
2865                                 centroids[i] = -1;                      
2866                         }
2867                 }
2868         
2869         return 0;
2870         }
2871         catch(exception& e) {
2872                 m->errorOut(e, "ShhherCommand", "calcCentroidsDriver");
2873                 exit(1);        
2874         }               
2875 }
2876 /**************************************************************************************************/
2877
2878 double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector<short>& uniqueFlowgrams,
2879                                         vector<short>& flowDataIntI, int numFlowCells){
2880         try{
2881                 
2882                 int flowAValue = cent * numFlowCells;
2883                 int flowBValue = flow * numFlowCells;
2884                 
2885                 double dist = 0;
2886         
2887                 for(int i=0;i<length;i++){
2888                         dist += singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
2889                         flowAValue++;
2890                         flowBValue++;
2891                 }
2892                 
2893                 return dist / (double)length;
2894         }
2895         catch(exception& e) {
2896                 m->errorOut(e, "ShhherCommand", "getDistToCentroid");
2897                 exit(1);        
2898         }               
2899 }
2900 /**************************************************************************************************/
2901
2902 double ShhherCommand::getNewWeights(int numOTUs, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<double>& singleTau, vector<int>& seqNumber, vector<double>& weight){
2903         try{
2904                 
2905                 double maxChange = 0;
2906                 
2907                 for(int i=0;i<numOTUs;i++){
2908                         
2909                         if (m->control_pressed) { break; }
2910                         
2911                         double difference = weight[i];
2912                         weight[i] = 0;
2913                         
2914                         for(int j=0;j<nSeqsPerOTU[i];j++){
2915                                 int index = cumNumSeqs[i] + j;
2916                                 double tauValue = singleTau[seqNumber[index]];
2917                                 weight[i] += tauValue;
2918                         }
2919                         
2920                         difference = fabs(weight[i] - difference);
2921                         if(difference > maxChange){     maxChange = difference; }
2922                 }
2923                 return maxChange;
2924         }
2925         catch(exception& e) {
2926                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2927                 exit(1);        
2928         }               
2929 }
2930
2931 /**************************************************************************************************/
2932
2933 double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<int>& seqNumber, vector<int>& cumNumSeqs, vector<int>& seqIndex, vector<double>& dist, vector<double>& weight){
2934         
2935         try{
2936                 
2937                 vector<long double> P(numSeqs, 0);
2938                 int effNumOTUs = 0;
2939                 
2940                 for(int i=0;i<numOTUs;i++){
2941                         if(weight[i] > MIN_WEIGHT){
2942                                 effNumOTUs++;
2943                         }
2944                 }
2945                 
2946                 string hold;
2947                 for(int i=0;i<numOTUs;i++){
2948                         
2949                         if (m->control_pressed) { break; }
2950                         
2951                         for(int j=0;j<nSeqsPerOTU[i];j++){
2952                                 int index = cumNumSeqs[i] + j;
2953                                 int nI = seqIndex[index];
2954                                 double singleDist = dist[seqNumber[index]];
2955                                 
2956                                 P[nI] += weight[i] * exp(-singleDist * sigma);
2957                         }
2958                 }
2959                 double nLL = 0.00;
2960                 for(int i=0;i<numSeqs;i++){
2961                         if(P[i] == 0){  P[i] = DBL_EPSILON;     }
2962             
2963                         nLL += -log(P[i]);
2964                 }
2965                 
2966                 nLL = nLL -(double)numSeqs * log(sigma);
2967         
2968                 return nLL; 
2969         }
2970         catch(exception& e) {
2971                 m->errorOut(e, "ShhherCommand", "getNewWeights");
2972                 exit(1);        
2973         }               
2974 }
2975
2976 /**************************************************************************************************/
2977
2978 int ShhherCommand::checkCentroids(int numOTUs, vector<int>& centroids, vector<double>& weight){
2979         try{
2980                 vector<int> unique(numOTUs, 1);
2981                 
2982                 for(int i=0;i<numOTUs;i++){
2983                         if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
2984                                 unique[i] = -1;
2985                         }
2986                 }
2987                 
2988                 for(int i=0;i<numOTUs;i++){
2989                         
2990                         if (m->control_pressed) { break; }
2991                         
2992                         if(unique[i] == 1){
2993                                 for(int j=i+1;j<numOTUs;j++){
2994                                         if(unique[j] == 1){
2995                                                 
2996                                                 if(centroids[j] == centroids[i]){
2997                                                         unique[j] = 0;
2998                                                         centroids[j] = -1;
2999                                                         
3000                                                         weight[i] += weight[j];
3001                                                         weight[j] = 0.0;
3002                                                 }
3003                                         }
3004                                 }
3005                         }
3006                 }
3007         
3008         return 0;
3009         }
3010         catch(exception& e) {
3011                 m->errorOut(e, "ShhherCommand", "checkCentroids");
3012                 exit(1);        
3013         }               
3014 }
3015 /**************************************************************************************************/
3016
3017 void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector<int>& nSeqsPerOTU, vector<double>& dist, 
3018                                      vector<double>& weight, vector<short>& change, vector<int>& centroids,
3019                                      vector<vector<int> >& aaP, vector<double>& singleTau, vector<vector<int> >& aaI,   
3020                                      vector<int>& seqNumber, vector<int>& seqIndex,
3021                                      vector<short>& uniqueFlowgrams,
3022                                      vector<short>& flowDataIntI, int numFlowCells, vector<int>& lengths){
3023         
3024         try{
3025                 
3026                 int total = 0;
3027                 vector<double> newTau(numOTUs,0);
3028                 vector<double> norms(numSeqs, 0);
3029                 nSeqsPerOTU.assign(numOTUs, 0);
3030         
3031                 for(int i=0;i<numSeqs;i++){
3032                         
3033                         if (m->control_pressed) { break; }
3034                         
3035                         int indexOffset = i * numOTUs;
3036             
3037                         double offset = 1e8;
3038                         
3039                         for(int j=0;j<numOTUs;j++){
3040                 
3041                                 if(weight[j] > MIN_WEIGHT && change[j] == 1){
3042                                         dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
3043                                 }
3044                 
3045                                 if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
3046                                         offset = dist[indexOffset + j];
3047                                 }
3048                         }
3049             
3050                         for(int j=0;j<numOTUs;j++){
3051                                 if(weight[j] > MIN_WEIGHT){
3052                                         newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j];
3053                                         norms[i] += newTau[j];
3054                                 }
3055                                 else{
3056                                         newTau[j] = 0.0;
3057                                 }
3058                         }
3059             
3060                         for(int j=0;j<numOTUs;j++){
3061                                 newTau[j] /= norms[i];
3062                         }
3063             
3064                         for(int j=0;j<numOTUs;j++){
3065                                 if(newTau[j] > MIN_TAU){
3066                                         
3067                                         int oldTotal = total;
3068                                         
3069                                         total++;
3070                                         
3071                                         singleTau.resize(total, 0);
3072                                         seqNumber.resize(total, 0);
3073                                         seqIndex.resize(total, 0);
3074                                         
3075                                         singleTau[oldTotal] = newTau[j];
3076                                         
3077                                         aaP[j][nSeqsPerOTU[j]] = oldTotal;
3078                                         aaI[j][nSeqsPerOTU[j]] = i;
3079                                         nSeqsPerOTU[j]++;
3080                                 }
3081                         }
3082             
3083                 }
3084         
3085         }
3086         catch(exception& e) {
3087                 m->errorOut(e, "ShhherCommand", "calcNewDistances");
3088                 exit(1);        
3089         }               
3090 }
3091 /**************************************************************************************************/
3092
3093 int ShhherCommand::fill(int numOTUs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
3094         try {
3095                 int index = 0;
3096                 for(int i=0;i<numOTUs;i++){
3097                         
3098                         if (m->control_pressed) { return 0; }
3099                         
3100                         cumNumSeqs[i] = index;
3101                         for(int j=0;j<nSeqsPerOTU[i];j++){
3102                                 seqNumber[index] = aaP[i][j];
3103                                 seqIndex[index] = aaI[i][j];
3104                                 
3105                                 index++;
3106                         }
3107                 }
3108         
3109         return 0; 
3110         }
3111         catch(exception& e) {
3112                 m->errorOut(e, "ShhherCommand", "fill");
3113                 exit(1);        
3114         }               
3115 }
3116 /**************************************************************************************************/
3117
3118 void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector<int>& seqNumber, vector<int>& seqIndex, vector<int>& cumNumSeqs, vector<int>& nSeqsPerOTU,
3119                             vector<int>& otuData, vector<double>& singleTau, vector<double>& dist, vector<vector<int> >& aaP, vector<vector<int> >& aaI){
3120         
3121         try {
3122                 vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
3123                 
3124                 for(int i=0;i<numOTUs;i++){
3125                         
3126                         if (m->control_pressed) { break; }
3127                         
3128                         for(int j=0;j<nSeqsPerOTU[i];j++){
3129                                 int index = cumNumSeqs[i] + j;
3130                                 double tauValue = singleTau[seqNumber[index]];
3131                                 int sIndex = seqIndex[index];
3132                                 bigTauMatrix[sIndex * numOTUs + i] = tauValue;                          
3133                         }
3134                 }
3135                 
3136                 for(int i=0;i<numSeqs;i++){
3137                         double maxTau = -1.0000;
3138                         int maxOTU = -1;
3139                         for(int j=0;j<numOTUs;j++){
3140                                 if(bigTauMatrix[i * numOTUs + j] > maxTau){
3141                                         maxTau = bigTauMatrix[i * numOTUs + j];
3142                                         maxOTU = j;
3143                                 }
3144                         }
3145                         
3146                         otuData[i] = maxOTU;
3147                 }
3148                 
3149                 nSeqsPerOTU.assign(numOTUs, 0);         
3150                 
3151                 for(int i=0;i<numSeqs;i++){
3152                         int index = otuData[i];
3153                         
3154                         singleTau[i] = 1.0000;
3155                         dist[i] = 0.0000;
3156                         
3157                         aaP[index][nSeqsPerOTU[index]] = i;
3158                         aaI[index][nSeqsPerOTU[index]] = i;
3159                         
3160                         nSeqsPerOTU[index]++;
3161                 }
3162         
3163                 fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);  
3164         }
3165         catch(exception& e) {
3166                 m->errorOut(e, "ShhherCommand", "setOTUs");
3167                 exit(1);        
3168         }               
3169 }
3170 /**************************************************************************************************/
3171
3172 void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string qualityFileName, vector<int> otuCounts, vector<int>& nSeqsPerOTU, vector<int>& seqNumber,
3173                                    vector<double>& singleTau, vector<short>& flowDataIntI, vector<short>& uniqueFlowgrams, vector<int>& cumNumSeqs,
3174                                    vector<int>& mapUniqueToSeq, vector<string>& seqNameVector, vector<int>& centroids, vector<vector<int> >& aaI){
3175         
3176         try {
3177         
3178                 ofstream qualityFile;
3179                 m->openOutputFile(qualityFileName, qualityFile);
3180         
3181                 qualityFile.setf(ios::fixed, ios::floatfield);
3182                 qualityFile.setf(ios::showpoint);
3183                 qualityFile << setprecision(6);
3184                 
3185                 vector<vector<int> > qualities(numOTUs);
3186                 vector<double> pr(HOMOPS, 0);
3187                 
3188                 
3189                 for(int i=0;i<numOTUs;i++){
3190                         
3191                         if (m->control_pressed) { break; }
3192                         
3193                         int index = 0;
3194                         int base = 0;
3195                         
3196                         if(nSeqsPerOTU[i] > 0){
3197                                 qualities[i].assign(1024, -1);
3198                                 
3199                                 while(index < numFlowCells){
3200                                         double maxPrValue = 1e8;
3201                                         short maxPrIndex = -1;
3202                                         double count = 0.0000;
3203                                         
3204                                         pr.assign(HOMOPS, 0);
3205                                         
3206                                         for(int j=0;j<nSeqsPerOTU[i];j++){
3207                                                 int lIndex = cumNumSeqs[i] + j;
3208                                                 double tauValue = singleTau[seqNumber[lIndex]];
3209                                                 int sequenceIndex = aaI[i][j];
3210                                                 short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
3211                                                 
3212                                                 count += tauValue;
3213                                                 
3214                                                 for(int s=0;s<HOMOPS;s++){
3215                                                         pr[s] += tauValue * singleLookUp[s * NUMBINS + intensity];
3216                                                 }
3217                                         }
3218                                         
3219                                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
3220                                         maxPrValue = pr[maxPrIndex];
3221                                         
3222                                         if(count > MIN_COUNT){
3223                                                 double U = 0.0000;
3224                                                 double norm = 0.0000;
3225                                                 
3226                                                 for(int s=0;s<HOMOPS;s++){
3227                                                         norm += exp(-(pr[s] - maxPrValue));
3228                                                 }
3229                                                 
3230                                                 for(int s=1;s<=maxPrIndex;s++){
3231                                                         int value = 0;
3232                                                         double temp = 0.0000;
3233                                                         
3234                                                         U += exp(-(pr[s-1]-maxPrValue))/norm;
3235                                                         
3236                                                         if(U>0.00){
3237                                                                 temp = log10(U);
3238                                                         }
3239                                                         else{
3240                                                                 temp = -10.1;
3241                                                         }
3242                                                         temp = floor(-10 * temp);
3243                                                         value = (int)floor(temp);
3244                                                         if(value > 100){        value = 100;    }
3245                                                         
3246                                                         qualities[i][base] = (int)value;
3247                                                         base++;
3248                                                 }
3249                                         }
3250                                         
3251                                         index++;
3252                                 }
3253                         }
3254                         
3255                         
3256                         if(otuCounts[i] > 0){
3257                                 qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
3258                         
3259                                 int j=4;        //need to get past the first four bases
3260                                 while(qualities[i][j] != -1){
3261                     qualityFile << qualities[i][j] << ' ';
3262                     if (j > qualities[i].size()) { break; }
3263                     j++;
3264                                 }
3265                                 qualityFile << endl;
3266                         }
3267                 }
3268                 qualityFile.close();
3269                 outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName);
3270         
3271         }
3272         catch(exception& e) {
3273                 m->errorOut(e, "ShhherCommand", "writeQualities");
3274                 exit(1);        
3275         }               
3276 }
3277
3278 /**************************************************************************************************/
3279
3280 void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string fastaFileName, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
3281         try {
3282                 
3283                 ofstream fastaFile;
3284                 m->openOutputFile(fastaFileName, fastaFile);
3285                 
3286                 vector<string> names(numOTUs, "");
3287                 
3288                 for(int i=0;i<numOTUs;i++){
3289                         
3290                         if (m->control_pressed) { break; }
3291                         
3292                         int index = centroids[i];
3293                         
3294                         if(otuCounts[i] > 0){
3295                                 fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
3296                                 
3297                                 string newSeq = "";
3298                                 
3299                                 for(int j=0;j<numFlowCells;j++){
3300                                         
3301                                         char base = flowOrder[j % 4];
3302                                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
3303                                                 newSeq += base;
3304                                         }
3305                                 }
3306                                 
3307                                 if (newSeq.length() >= 4) {  fastaFile << newSeq.substr(4) << endl;  }
3308                 else {  fastaFile << "NNNN" << endl;  }
3309                         }
3310                 }
3311                 fastaFile.close();
3312         
3313                 outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
3314         
3315                 if(thisCompositeFASTAFileName != ""){
3316                         m->appendFiles(fastaFileName, thisCompositeFASTAFileName);
3317                 }
3318         }
3319         catch(exception& e) {
3320                 m->errorOut(e, "ShhherCommand", "writeSequences");
3321                 exit(1);        
3322         }               
3323 }
3324
3325 /**************************************************************************************************/
3326
3327 void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string nameFileName, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
3328         try {
3329                 
3330                 ofstream nameFile;
3331                 m->openOutputFile(nameFileName, nameFile);
3332                 
3333                 for(int i=0;i<numOTUs;i++){
3334                         
3335                         if (m->control_pressed) { break; }
3336                         
3337                         if(otuCounts[i] > 0){
3338                                 nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
3339                                 
3340                                 for(int j=1;j<nSeqsPerOTU[i];j++){
3341                                         nameFile << ',' << seqNameVector[aaI[i][j]];
3342                                 }
3343                                 
3344                                 nameFile << endl;
3345                         }
3346                 }
3347                 nameFile.close();
3348                 outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
3349                 
3350                 
3351                 if(thisCompositeNamesFileName != ""){
3352                         m->appendFiles(nameFileName, thisCompositeNamesFileName);
3353                 }               
3354         }
3355         catch(exception& e) {
3356                 m->errorOut(e, "ShhherCommand", "writeNames");
3357                 exit(1);        
3358         }               
3359 }
3360
3361 /**************************************************************************************************/
3362
3363 void ShhherCommand::writeGroups(string groupFileName, string fileRoot, int numSeqs, vector<string>& seqNameVector){
3364         try {
3365         ofstream groupFile;
3366                 m->openOutputFile(groupFileName, groupFile);
3367                 
3368                 for(int i=0;i<numSeqs;i++){
3369                         if (m->control_pressed) { break; }
3370                         groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
3371                 }
3372                 groupFile.close();
3373                 outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName);
3374         
3375         }
3376         catch(exception& e) {
3377                 m->errorOut(e, "ShhherCommand", "writeGroups");
3378                 exit(1);        
3379         }               
3380 }
3381
3382 /**************************************************************************************************/
3383
3384 void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
3385         try {
3386                 ofstream otuCountsFile;
3387                 m->openOutputFile(otuCountsFileName, otuCountsFile);
3388                 
3389                 string bases = flowOrder;
3390                 
3391                 for(int i=0;i<numOTUs;i++){
3392                         
3393                         if (m->control_pressed) {
3394                                 break;
3395                         }
3396                         //output the translated version of the centroid sequence for the otu
3397                         if(otuCounts[i] > 0){
3398                                 int index = centroids[i];
3399                                 
3400                                 otuCountsFile << "ideal\t";
3401                                 for(int j=8;j<numFlowCells;j++){
3402                                         char base = bases[j % 4];
3403                                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
3404                                                 otuCountsFile << base;
3405                                         }
3406                                 }
3407                                 otuCountsFile << endl;
3408                                 
3409                                 for(int j=0;j<nSeqsPerOTU[i];j++){
3410                                         int sequence = aaI[i][j];
3411                                         otuCountsFile << seqNameVector[sequence] << '\t';
3412                                         
3413                                         string newSeq = "";
3414                                         
3415                                         for(int k=0;k<lengths[sequence];k++){
3416                                                 char base = bases[k % 4];
3417                                                 int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
3418                         
3419                                                 for(int s=0;s<freq;s++){
3420                                                         newSeq += base;
3421                                                         //otuCountsFile << base;
3422                                                 }
3423                                         }
3424                                         
3425                     if (newSeq.length() >= 4) {  otuCountsFile << newSeq.substr(4) << endl;  }
3426                     else {  otuCountsFile << "NNNN" << endl;  }
3427                                 }
3428                                 otuCountsFile << endl;
3429                         }
3430                 }
3431                 otuCountsFile.close();
3432                 outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName);
3433         
3434         }
3435         catch(exception& e) {
3436                 m->errorOut(e, "ShhherCommand", "writeClusters");
3437                 exit(1);        
3438         }               
3439 }
3440
3441 /**************************************************************************************************/
3442
3443 void ShhherCommand::getSingleLookUp(){
3444         try{
3445                 //      these are the -log probabilities that a signal corresponds to a particular homopolymer length
3446                 singleLookUp.assign(HOMOPS * NUMBINS, 0);
3447                 
3448                 int index = 0;
3449                 ifstream lookUpFile;
3450                 m->openInputFile(lookupFileName, lookUpFile);
3451                 
3452                 for(int i=0;i<HOMOPS;i++){
3453                         
3454                         if (m->control_pressed) { break; }
3455                         
3456                         float logFracFreq;
3457                         lookUpFile >> logFracFreq;
3458                         
3459                         for(int j=0;j<NUMBINS;j++)      {
3460                                 lookUpFile >> singleLookUp[index];
3461                                 index++;
3462                         }
3463                 }       
3464                 lookUpFile.close();
3465         }
3466         catch(exception& e) {
3467                 m->errorOut(e, "ShhherCommand", "getSingleLookUp");
3468                 exit(1);
3469         }
3470 }
3471
3472 /**************************************************************************************************/
3473
3474 void ShhherCommand::getJointLookUp(){
3475         try{
3476                 
3477                 //      the most likely joint probability (-log) that two intenities have the same polymer length
3478                 jointLookUp.resize(NUMBINS * NUMBINS, 0);
3479                 
3480                 for(int i=0;i<NUMBINS;i++){
3481                         
3482                         if (m->control_pressed) { break; }
3483                         
3484                         for(int j=0;j<NUMBINS;j++){             
3485                                 
3486                                 double minSum = 100000000;
3487                                 
3488                                 for(int k=0;k<HOMOPS;k++){
3489                                         double sum = singleLookUp[k * NUMBINS + i] + singleLookUp[k * NUMBINS + j];
3490                                         
3491                                         if(sum < minSum)        {       minSum = sum;           }
3492                                 }       
3493                                 jointLookUp[i * NUMBINS + j] = minSum;
3494                         }
3495                 }
3496         }
3497         catch(exception& e) {
3498                 m->errorOut(e, "ShhherCommand", "getJointLookUp");
3499                 exit(1);
3500         }
3501 }
3502
3503 /**************************************************************************************************/
3504
3505 double ShhherCommand::getProbIntensity(int intIntensity){                          
3506         try{
3507
3508                 double minNegLogProb = 100000000; 
3509
3510                 
3511                 for(int i=0;i<HOMOPS;i++){//loop signal strength
3512                         
3513                         if (m->control_pressed) { break; }
3514                         
3515                         float negLogProb = singleLookUp[i * NUMBINS + intIntensity];
3516                         if(negLogProb < minNegLogProb)  {       minNegLogProb = negLogProb; }
3517                 }
3518                 
3519                 return minNegLogProb;
3520         }
3521         catch(exception& e) {
3522                 m->errorOut(e, "ShhherCommand", "getProbIntensity");
3523                 exit(1);
3524         }
3525 }
3526
3527
3528
3529