]> git.donarmstrong.com Git - mothur.git/blob - shhhseqscommand.cpp
added pcr.seqs command. fixed bug in rarefacftion.single that caused parsing error...
[mothur.git] / shhhseqscommand.cpp
1 /*
2  *  shhhseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhseqscommand.h"
11
12
13
14 //**********************************************************************************************************************
15 vector<string> ShhhSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(psigma);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "ShhhSeqsCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string ShhhSeqsCommand::getHelpString(){        
36         try {
37                 string helpString = "";
38                 helpString += "The shhh.seqs command reads a fasta and name file and ....\n";
39                 helpString += "The shhh.seqs command parameters are fasta, name, group, sigma and processors.\n";
40                 helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
41                 helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
42                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
43                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
44                 helpString += "The sigma parameter ....  The default is 0.01. \n";
45                 helpString += "The shhh.seqs command should be in the following format: \n";
46                 helpString += "shhh.seqs(fasta=yourFastaFile, name=yourNameFile) \n";
47                 helpString += "Example: shhh.seqs(fasta=AD.align, name=AD.names) \n";
48                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
49                 return helpString;
50                 
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "ShhhSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58
59 ShhhSeqsCommand::ShhhSeqsCommand(){     
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["name"] = tempOutNames;
66                 outputTypes["map"] = tempOutNames;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "ShhhSeqsCommand", "ShhhSeqsCommand");
70                 exit(1);
71         }
72 }
73
74 //**********************************************************************************************************************
75 ShhhSeqsCommand::ShhhSeqsCommand(string option) {
76         try {
77                 abort = false; calledHelp = false;   
78                 
79                 //allow user to run help
80                 if(option == "help") { help(); abort = true; calledHelp = true; }
81                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
82                 
83                 else {
84                         vector<string> myArray = setParameters();
85                         
86                         OptionParser parser(option);
87                         map<string, string> parameters = parser.getParameters();
88                         
89                         ValidParameters validParameter;
90                         map<string, string>::iterator it;
91                         
92                         //check to make sure all parameters are valid for command
93                         for (map<string, string>::iterator it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
94                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
95                         }
96                         
97                         //initialize outputTypes
98                         vector<string> tempOutNames;
99                         outputTypes["fasta"] = tempOutNames;
100                         outputTypes["name"] = tempOutNames;
101                         outputTypes["map"] = tempOutNames;
102                         
103                         //if the user changes the input directory command factory will send this info to us in the output parameter 
104                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
105                         if (inputDir == "not found"){   inputDir = "";          }
106                         else {
107                                 string path;
108                                 it = parameters.find("fasta");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
114                                 }
115                                 
116                                 it = parameters.find("name");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
122                                 }
123                                 
124                                 it = parameters.find("group");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
130                                 }
131                         }
132                         
133                         //check for required parameters
134                         fastafile = validParameter.validFile(parameters, "fasta", true);
135                         if (fastafile == "not found") {                                 
136                                 fastafile = m->getFastaFile(); 
137                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
138                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
139                         }
140                         else if (fastafile == "not open") { abort = true; }     
141                         else { m->setFastaFile(fastafile); }
142                         
143                         //if the user changes the output directory command factory will send this info to us in the output parameter 
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
145                         
146                         //check for optional parameter and set defaults
147                         // ...at some point should added some additional type checking...
148                         namefile = validParameter.validFile(parameters, "name", true);
149                         if (namefile == "not found") {                  
150                                 namefile = m->getNameFile(); 
151                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
152                                 else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }
153                         }
154                         else if (namefile == "not open") { namefile =  ""; abort = true; }      
155                         else {  m->setNameFile(namefile); }
156                         
157                         groupfile = validParameter.validFile(parameters, "group", true);
158                         if (groupfile == "not found") { groupfile =  "";   }
159                         else if (groupfile == "not open") { abort = true; groupfile =  ""; }    
160                         else {   m->setGroupFile(groupfile);  }
161                         
162                         string temp     = validParameter.validFile(parameters, "sigma", false);         if(temp == "not found"){        temp = "0.01"; }
163                         m->mothurConvert(temp, sigma); 
164                         sigma = 1/sigma;
165             
166                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
167                         m->setProcessors(temp);
168                         m->mothurConvert(temp, processors);
169                         
170                         if (namefile == "") {
171                                 vector<string> files; files.push_back(fastafile);
172                                 parser.getNameFile(files);
173                         }
174                 }
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "ShhhSeqsCommand", "ShhhSeqsCommand");
178                 exit(1);
179         }
180 }
181 //**********************************************************************************************************************
182 int ShhhSeqsCommand::execute() {
183         try {
184                 
185                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
186                 
187                 if (outputDir == "") { outputDir = m->hasPath(fastafile);  }//if user entered a file with a path then preserve it                               
188                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.fasta";
189                 string nameFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.names";
190                 string mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.map";
191                 
192                 if (groupfile != "") {
193                         //Parse sequences by group
194                         SequenceParser parser(groupfile, fastafile, namefile);
195                         vector<string> groups = parser.getNamesOfGroups();
196                         
197                         if (m->control_pressed) {  return 0; }
198                         
199                         //clears files
200                         ofstream out, out1, out2;
201                         m->openOutputFile(outputFileName, out); out.close(); 
202                         m->openOutputFile(nameFileName, out1); out1.close();
203                         mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.";
204                         
205                         vector<string> mapFileNames;
206                         if(processors == 1)     {       mapFileNames = driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups);       }
207                         else                            {       mapFileNames = createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups);                        }
208                         
209                         if (m->control_pressed) {    return 0;  }       
210                         
211                         for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); }
212                         
213                         //deconvolute results by running unique.seqs
214                         deconvoluteResults(outputFileName, nameFileName);
215                         
216                         if (m->control_pressed) {   return 0;   }                               
217                         
218                 }else{  
219                         vector<string> sequences;
220                         vector<string> uniqueNames;
221                         vector<string> redundantNames;
222                         vector<int> seqFreq;
223                         
224                         seqNoise noise;
225                         correctDist* correct = new correctDist(processors);
226                         
227                         //reads fasta and name file and loads them in order
228                         readData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq);
229                         if (m->control_pressed) { return 0; }
230                         
231                         //calc distances for cluster
232                         string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.dist";
233                         correct->execute(distFileName);
234                         delete correct;
235                         
236                         if (m->control_pressed) { m->mothurRemove(distFileName); return 0; }
237                         
238                         driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName); 
239                         outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName);
240                 }
241                 
242                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
243                 
244                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
245                 outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
246                 
247                 m->mothurOutEndLine();
248                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
249                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
250                 m->mothurOutEndLine();
251                 
252                 //set accnos file as new current accnosfile
253                 string current = "";
254                 itTypes = outputTypes.find("fasta");
255                 if (itTypes != outputTypes.end()) {
256                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
257                 }
258                 
259                 itTypes = outputTypes.find("name");
260                 if (itTypes != outputTypes.end()) {
261                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
262                 }
263                 
264                 
265                 return 0;
266         }
267         catch(exception& e) {
268                 m->errorOut(e, "ShhhSeqsCommand", "execute");
269                 exit(1);
270         }
271 }
272 //**********************************************************************************************************************
273 int ShhhSeqsCommand::readData(correctDist* correct, seqNoise& noise, vector<string>& seqs, vector<string>& uNames, vector<string>& rNames, vector<int>& freq) {
274         try {
275                 map<string, string> nameMap; 
276                 map<string, string>::iterator it;
277                 m->readNames(namefile, nameMap);
278                 bool error = false;
279                 
280                 ifstream in;
281                 m->openInputFile(fastafile, in);
282                 
283                 while (!in.eof()) {
284                         
285                         if (m->control_pressed) { in.close(); return 0; }
286                         
287                         Sequence seq(in); m->gobble(in);
288                         
289                         if (seq.getName() != "") {
290                                 correct->addSeq(seq.getName(), seq.getAligned());
291                                 
292                                 it = nameMap.find(seq.getName());
293                                 if (it != nameMap.end()) {
294                                         noise.addSeq(seq.getAligned(), seqs);
295                                         noise.addRedundantName(it->first, it->second, uNames, rNames, freq);
296                                 }else {
297                                         m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your namefile, please correct.");
298                                         error = true;
299                                 }
300                         }
301                 }
302                 in.close();
303                 
304                 if (error) { m->control_pressed = true; }
305                 
306                 return seqs.size();
307                 
308         }catch(exception& e) {
309                 m->errorOut(e, "ShhhSeqsCommand", "readData");
310                 exit(1);
311         }
312 }
313 //**********************************************************************************************************************
314 int ShhhSeqsCommand::loadData(correctDist* correct, seqNoise& noise, vector<string>& seqs, vector<string>& uNames, vector<string>& rNames, vector<int>& freq, map<string, string>& nameMap, vector<Sequence>& sequences) {
315         try {
316                 bool error = false;
317                 map<string, string>::iterator it;
318                 
319                 for (int i = 0; i < sequences.size(); i++) {
320                         
321                         if (m->control_pressed) { return 0; }
322                         
323                         if (sequences[i].getName() != "") {
324                                 correct->addSeq(sequences[i].getName(), sequences[i].getAligned());
325                                 
326                                 it = nameMap.find(sequences[i].getName());
327                                 if (it != nameMap.end()) {
328                                         noise.addSeq(sequences[i].getAligned(), seqs);
329                                         noise.addRedundantName(it->first, it->second, uNames, rNames, freq);
330                                 }else {
331                                         m->mothurOut("[ERROR]: " + sequences[i].getName() + " is in your fasta file and not in your namefile, please correct.");
332                                         error = true;
333                                 }
334                         }
335                 }
336                                 
337                 if (error) { m->control_pressed = true; }
338                 
339                 return seqs.size();
340                 
341         }catch(exception& e) {
342                 m->errorOut(e, "ShhhSeqsCommand", "loadData");
343                 exit(1);
344         }
345 }
346 /**************************************************************************************************/
347 vector<string> ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector<string> groups) {
348         try {
349                 
350                 vector<int> processIDS;
351                 int process = 1;
352                 vector<string> mapfileNames;
353                 
354                 //sanity check
355                 if (groups.size() < processors) { processors = groups.size(); }
356                 
357                 //divide the groups between the processors
358                 vector<linePair> lines;
359                 int numGroupsPerProcessor = groups.size() / processors;
360                 for (int i = 0; i < processors; i++) {
361                         int startIndex =  i * numGroupsPerProcessor;
362                         int endIndex = (i+1) * numGroupsPerProcessor;
363                         if(i == (processors - 1)){      endIndex = groups.size();       }
364                         lines.push_back(linePair(startIndex, endIndex));
365                 }
366                 
367 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
368                 
369                 //loop through and create all the processes you want
370                 while (process != processors) {
371                         int pid = fork();
372                         
373                         if (pid > 0) {
374                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
375                                 process++;
376                         }else if (pid == 0){
377                                 mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
378                                 
379                                 //pass filenames to parent
380                                 ofstream out;
381                                 string tempFile = newMName + toString(getpid()) + ".temp";
382                                 m->openOutputFile(tempFile, out);
383                                 out << mapfileNames.size() << endl;
384                                 for (int i = 0; i < mapfileNames.size(); i++) {
385                                         out << mapfileNames[i] << endl;
386                                 }
387                                 out.close();
388                                 
389                                 exit(0);
390                         }else { 
391                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
392                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
393                                 exit(0);
394                         }
395                 }
396                 
397                 //do my part
398                 mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
399                 
400                 //force parent to wait until all the processes are done
401                 for (int i=0;i<processIDS.size();i++) { 
402                         int temp = processIDS[i];
403                         wait(&temp);
404                 }
405                 
406                 //append output files
407                 for(int i=0;i<processIDS.size();i++){
408                         ifstream in;
409                         string tempFile =  newMName + toString(processIDS[i]) + ".temp";
410                         m->openInputFile(tempFile, in);
411                         if (!in.eof()) { 
412                                 int tempNum = 0; in >> tempNum;  m->gobble(in);
413                                 for (int j = 0; j < tempNum; j++) {
414                                         string filename;
415                                         in >> filename; m->gobble(in);
416                                         mapfileNames.push_back(filename);
417                                 }
418                         }
419                         in.close(); m->mothurRemove(tempFile);
420                         
421                 }
422 #else
423                 
424                 //////////////////////////////////////////////////////////////////////////////////////////////////////
425                 //Windows version shared memory, so be careful when passing variables through the shhhseqsData struct. 
426                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
427                 //////////////////////////////////////////////////////////////////////////////////////////////////////
428                 
429                 vector<shhhseqsData*> pDataArray; 
430                 DWORD   dwThreadIdArray[processors-1];
431                 HANDLE  hThreadArray[processors-1]; 
432                 
433                 //Create processor worker threads.
434                 for( int i=1; i<processors; i++ ){
435                         // Allocate memory for thread data.
436                         string extension = toString(i) + ".temp";
437
438                         shhhseqsData* tempShhhseqs = new shhhseqsData(fastafile, namefile, groupfile, (newFName+extension), (newNName+extension), newMName, groups, m, lines[i].start, lines[i].end, sigma, i);
439                         pDataArray.push_back(tempShhhseqs);
440                         processIDS.push_back(i);
441                         
442                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
443                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
444                         hThreadArray[i-1] = CreateThread(NULL, 0, MyShhhSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
445                 }
446                 
447                 
448                 //using the main process as a worker saves time and memory
449                 mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
450                 
451                 //Wait until all threads have terminated.
452                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
453                 
454                 //Close all thread handles and free memory allocations.
455                 for(int i=0; i < pDataArray.size(); i++){
456                         for (int j = 0; j < pDataArray[i]->mapfileNames.size(); j++) {
457                                 mapfileNames.push_back(pDataArray[i]->mapfileNames[j]);
458                         }
459                         CloseHandle(hThreadArray[i]);
460                         delete pDataArray[i];
461                 }
462                 
463 #endif          
464                 
465                 //append output files
466                 for(int i=0;i<processIDS.size();i++){
467                         m->appendFiles((newFName + toString(processIDS[i]) + ".temp"), newFName);
468                         m->mothurRemove((newFName + toString(processIDS[i]) + ".temp"));
469                         
470                         m->appendFiles((newNName + toString(processIDS[i]) + ".temp"), newNName);
471                         m->mothurRemove((newNName + toString(processIDS[i]) + ".temp"));
472                 }
473                 
474                 return mapfileNames;    
475                 
476         }
477         catch(exception& e) {
478                 m->errorOut(e, "ShhhSeqsCommand", "createProcessesGroups");
479                 exit(1);
480         }
481 }
482 /**************************************************************************************************/
483 vector<string> ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
484         try {
485                 
486                 vector<string> mapFileNames;
487                 
488                 for (int i = start; i < end; i++) {
489                         
490                         start = time(NULL);
491                         
492                         if (m->control_pressed) {  return mapFileNames; }
493                         
494                         m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
495                         
496                         map<string, string> thisNameMap;
497                         thisNameMap = parser.getNameMap(groups[i]); 
498                         vector<Sequence> thisSeqs = parser.getSeqs(groups[i]);
499                         
500                         vector<string> sequences;
501                         vector<string> uniqueNames;
502                         vector<string> redundantNames;
503                         vector<int> seqFreq;
504                         
505                         seqNoise noise;
506                         correctDist* correct = new correctDist(1); //we use one processor since we already split up the work load.
507                         
508                         //load this groups info in order
509                         loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs);
510                         if (m->control_pressed) { return mapFileNames; }
511                         
512                         //calc distances for cluster
513                         string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + groups[i] + ".shhh.dist";
514                         correct->execute(distFileName);
515                         delete correct;
516                         
517                         if (m->control_pressed) { m->mothurRemove(distFileName); return mapFileNames; }
518                         
519                         driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map"); 
520                         
521                         if (m->control_pressed) { return mapFileNames; }
522                         
523                         m->appendFiles(newFFile+groups[i], newFFile); m->mothurRemove(newFFile+groups[i]);
524                         m->appendFiles(newNFile+groups[i], newNFile); m->mothurRemove(newNFile+groups[i]);
525                         mapFileNames.push_back(newMFile+groups[i]+".map");
526                         
527                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + groups[i] + "."); m->mothurOutEndLine(); 
528                 }
529                 
530                 return mapFileNames;
531         }
532         catch(exception& e) {
533                 m->errorOut(e, "ShhhSeqsCommand", "driverGroups");
534                 exit(1);
535         }
536 }
537 //**********************************************************************************************************************
538 int ShhhSeqsCommand::driver(seqNoise& noise, 
539                                                         vector<string>& sequences, 
540                                                         vector<string>& uniqueNames, 
541                                                         vector<string>& redundantNames, 
542                                                         vector<int>& seqFreq, 
543                                                         string distFileName, string outputFileName, string nameFileName, string mapFileName) {
544         try {
545                 double cutOff = 0.08;
546                 int minIter = 10;
547                 int maxIter = 1000;
548                 double minDelta = 1e-6;
549                 int numIters = 0;
550                 double maxDelta = 1e6;
551                 int numSeqs = sequences.size();
552                                 
553                 //run cluster command
554                 string inputString = "phylip=" + distFileName + ", method=furthest, cutoff=0.08";
555                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
556                 m->mothurOut("Running command: cluster(" + inputString + ")"); m->mothurOutEndLine(); 
557                 
558                 Command* clusterCommand = new ClusterCommand(inputString);
559                 clusterCommand->execute();
560                 
561                 map<string, vector<string> > filenames = clusterCommand->getOutputFiles();
562                 string listFileName = filenames["list"][0];
563                 string rabundFileName = filenames["rabund"][0]; m->mothurRemove(rabundFileName);
564                 string sabundFileName = filenames["sabund"][0]; m->mothurRemove(sabundFileName);
565         
566                 delete clusterCommand;
567                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
568                 
569                 if (m->control_pressed) { m->mothurRemove(distFileName); m->mothurRemove(listFileName); return 0; }
570                 
571                 vector<double> distances(numSeqs * numSeqs);
572                 noise.getDistanceData(distFileName, distances);
573                 m->mothurRemove(distFileName); 
574                 if (m->control_pressed) { m->mothurRemove(listFileName); return 0; }
575                 
576                 vector<int> otuData(numSeqs);
577                 vector<int> otuFreq;
578                 vector<vector<int> > otuBySeqLookUp;
579                 noise.getListData(listFileName, cutOff, otuData, otuFreq, otuBySeqLookUp);
580                 m->mothurRemove(listFileName);
581                 if (m->control_pressed) { return 0; }
582                 
583                 int numOTUs = otuFreq.size();
584                 
585                 vector<double> weights(numOTUs, 0);
586                 vector<int> change(numOTUs, 1);
587                 vector<int> centroids(numOTUs, -1);
588                 vector<int> cumCount(numOTUs, 0);
589                 
590                 vector<double> tau(numSeqs, 1);
591                 vector<int> anP(numSeqs, 0);
592                 vector<int> anI(numSeqs, 0);
593                 vector<int> anN(numSeqs, 0);
594                 vector<vector<int> > aanI = otuBySeqLookUp;
595                 
596                 while(numIters < minIter || ((maxDelta > minDelta) && (numIters < maxIter))){
597                         
598                         if (m->control_pressed) { return 0; }
599                         
600                         noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); if (m->control_pressed) { return 0; }
601                         maxDelta = noise.calcNewWeights(weights, seqFreq, anI, cumCount, anP, otuFreq, tau);  if (m->control_pressed) { return 0; }
602                         
603                         noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (m->control_pressed) { return 0; }
604                         noise.checkCentroids(weights, centroids); if (m->control_pressed) { return 0; }
605                          
606                         otuFreq.assign(numOTUs, 0);
607                         
608                         int total = 0;
609                         
610                         for(int i=0;i<numSeqs;i++){
611                                 if (m->control_pressed) { return 0; }
612                                 
613                                 double offset = 1e6;
614                                 double norm = 0.0000;
615                                 double minWeight = 0.1;
616                                 vector<double> currentTau(numOTUs);
617                                 
618                                 for(int j=0;j<numOTUs;j++){
619                                         if (m->control_pressed) { return 0; }
620                                         if(weights[j] > minWeight && distances[i * numSeqs+centroids[j]] < offset){
621                                                 offset = distances[i * numSeqs+centroids[j]];
622                                         }
623                                 }
624                                 
625                                 for(int j=0;j<numOTUs;j++){
626                                         if (m->control_pressed) { return 0; }
627                                         if(weights[j] > minWeight){
628                                                 currentTau[j] = exp(sigma * (-distances[(i * numSeqs + centroids[j])] + offset)) * weights[j];
629                                                 norm += currentTau[j];
630                                         }
631                                         else{
632                                                 currentTau[j] = 0.0000;
633                                         }
634                                 }                       
635                                 
636                                 for(int j=0;j<numOTUs;j++){
637                                         if (m->control_pressed) { return 0; }
638                                         currentTau[j] /= norm;
639                                 }
640                                 
641                                 for(int j=0;j<numOTUs;j++){
642                                         if (m->control_pressed) { return 0; }
643                                         
644                                         if(currentTau[j] > 1.0e-4){
645                                                 int oldTotal = total;
646                                                 total++;
647                                                 
648                                                 tau.resize(oldTotal+1);
649                                                 tau[oldTotal] = currentTau[j];
650                                                 otuBySeqLookUp[j][otuFreq[j]] = oldTotal;
651                                                 aanI[j][otuFreq[j]] = i;
652                                                 otuFreq[j]++;
653                                                 
654                                         }
655                                 }
656                                 
657                                 anP.resize(total);
658                                 anI.resize(total);
659                         }
660                         
661                         numIters++;
662                 }
663                 
664                 noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount);  if (m->control_pressed) { return 0; }
665                 
666                 vector<double> percentage(numSeqs);
667                 noise.setUpOTUData(otuData, percentage, cumCount, tau, otuFreq, anP, anI);  if (m->control_pressed) { return 0; }
668                 noise.finishOTUData(otuData, otuFreq, anP, anI, cumCount, otuBySeqLookUp, aanI, tau);  if (m->control_pressed) { return 0; }
669                 
670                 change.assign(numOTUs, 1);
671                 noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (m->control_pressed) { return 0; }
672                 
673                 
674                 vector<int> finalTau(numOTUs, 0);
675                 for(int i=0;i<numSeqs;i++){
676                         if (m->control_pressed) { return 0; }
677                         finalTau[otuData[i]] += int(seqFreq[i]);
678                 }
679                 
680                 noise.writeOutput(outputFileName, nameFileName, mapFileName, finalTau, centroids, otuData, sequences, uniqueNames, redundantNames, seqFreq, distances);
681                 
682                 return 0;
683                 
684         }catch(exception& e) {
685                 m->errorOut(e, "ShhhSeqsCommand", "driver");
686                 exit(1);
687         }
688 }       
689 //**********************************************************************************************************************
690 int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){
691         try {
692                 m->mothurOutEndLine(); m->mothurOut("Deconvoluting results:"); m->mothurOutEndLine(); m->mothurOutEndLine();
693                 
694                 //use unique.seqs to create new name and fastafile
695                 string inputString = "fasta=" + fastaFile + ", name=" + nameFile;
696                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
697                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
698                 m->mothurCalling = true;
699         
700                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
701                 uniqueCommand->execute();
702                 
703                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
704                 
705                 delete uniqueCommand;
706                 m->mothurCalling = false;
707                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
708                 
709                 string newnameFile = filenames["name"][0];
710                 string newfastaFile = filenames["fasta"][0];
711                 
712                 m->mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); 
713                 if (nameFile != newnameFile) { m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); }
714                 
715                 return 0;
716         }
717         catch(exception& e) {
718                 m->errorOut(e, "ShhhSeqsCommand", "deconvoluteResults");
719                 exit(1);
720         }
721 }
722 //**********************************************************************************************************************
723
724
725