]> git.donarmstrong.com Git - mothur.git/blob - shhhseqscommand.cpp
added modify names parameter to set.dir
[mothur.git] / shhhseqscommand.cpp
1 /*
2  *  shhhseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "shhhseqscommand.h"
11
12
13
14 //**********************************************************************************************************************
15 vector<string> ShhhSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-map",false,true,true); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","name",false,true,true); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(psigma);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "ShhhSeqsCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string ShhhSeqsCommand::getHelpString(){        
36         try {
37                 string helpString = "";
38                 helpString += "The shhh.seqs command reads a fasta and name file and ....\n";
39                 helpString += "The shhh.seqs command parameters are fasta, name, group, sigma and processors.\n";
40                 helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
41                 helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
42                 helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
43                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
44                 helpString += "The sigma parameter ....  The default is 0.01. \n";
45                 helpString += "The shhh.seqs command should be in the following format: \n";
46                 helpString += "shhh.seqs(fasta=yourFastaFile, name=yourNameFile) \n";
47                 helpString += "Example: shhh.seqs(fasta=AD.align, name=AD.names) \n";
48                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
49                 return helpString;
50                 
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "ShhhSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string ShhhSeqsCommand::getOutputPattern(string type) {
59     try {
60         string pattern = "";
61         
62         if (type == "fasta")            {   pattern = "[filename],shhh_seqs.fasta";   }
63         else if (type == "name")    {   pattern = "[filename],shhh_seqs.names";   }
64         else if (type == "map")        {   pattern = "[filename],shhh_seqs.map";   }
65         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
66         
67         return pattern;
68     }
69     catch(exception& e) {
70         m->errorOut(e, "ShhhSeqsCommand", "getOutputPattern");
71         exit(1);
72     }
73 }
74
75 //**********************************************************************************************************************
76
77 ShhhSeqsCommand::ShhhSeqsCommand(){     
78         try {
79                 abort = true; calledHelp = true;
80                 setParameters();
81                 vector<string> tempOutNames;
82                 outputTypes["fasta"] = tempOutNames;
83                 outputTypes["name"] = tempOutNames;
84                 outputTypes["map"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "ShhhSeqsCommand", "ShhhSeqsCommand");
88                 exit(1);
89         }
90 }
91
92 //**********************************************************************************************************************
93 ShhhSeqsCommand::ShhhSeqsCommand(string option) {
94         try {
95                 abort = false; calledHelp = false;   
96                 
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string, string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter;
108                         map<string, string>::iterator it;
109                         
110                         //check to make sure all parameters are valid for command
111                         for (map<string, string>::iterator it2 = parameters.begin(); it2 != parameters.end(); it2++) { 
112                                 if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["fasta"] = tempOutNames;
118                         outputTypes["name"] = tempOutNames;
119                         outputTypes["map"] = tempOutNames;
120                         
121                         //if the user changes the input directory command factory will send this info to us in the output parameter 
122                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
123                         if (inputDir == "not found"){   inputDir = "";          }
124                         else {
125                                 string path;
126                                 it = parameters.find("fasta");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
132                                 }
133                                 
134                                 it = parameters.find("name");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
140                                 }
141                                 
142                                 it = parameters.find("group");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
148                                 }
149                         }
150                         
151                         //check for required parameters
152                         fastafile = validParameter.validFile(parameters, "fasta", true);
153                         if (fastafile == "not found") {                                 
154                                 fastafile = m->getFastaFile(); 
155                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
156                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
157                         }
158                         else if (fastafile == "not open") { abort = true; }     
159                         else { m->setFastaFile(fastafile); }
160                         
161                         //if the user changes the output directory command factory will send this info to us in the output parameter 
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
163                         
164                         //check for optional parameter and set defaults
165                         // ...at some point should added some additional type checking...
166                         namefile = validParameter.validFile(parameters, "name", true);
167                         if (namefile == "not found") {                  
168                                 namefile = m->getNameFile(); 
169                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
170                                 else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }
171                         }
172                         else if (namefile == "not open") { namefile =  ""; abort = true; }      
173                         else {  m->setNameFile(namefile); }
174                         
175                         groupfile = validParameter.validFile(parameters, "group", true);
176                         if (groupfile == "not found") { groupfile =  "";   }
177                         else if (groupfile == "not open") { abort = true; groupfile =  ""; }    
178                         else {   m->setGroupFile(groupfile);  }
179                         
180                         string temp     = validParameter.validFile(parameters, "sigma", false);         if(temp == "not found"){        temp = "0.01"; }
181                         m->mothurConvert(temp, sigma); 
182                         sigma = 1/sigma;
183             
184                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
185                         m->setProcessors(temp);
186                         m->mothurConvert(temp, processors);
187                         
188                         if (namefile == "") {
189                                 vector<string> files; files.push_back(fastafile);
190                                 parser.getNameFile(files);
191                         }
192                 }
193         }
194         catch(exception& e) {
195                 m->errorOut(e, "ShhhSeqsCommand", "ShhhSeqsCommand");
196                 exit(1);
197         }
198 }
199 //**********************************************************************************************************************
200 int ShhhSeqsCommand::execute() {
201         try {
202                 
203                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
204                 
205                 if (outputDir == "") { outputDir = m->hasPath(fastafile);  }//if user entered a file with a path then preserve it               
206                 
207         map<string, string> variables; 
208                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
209                 string outputFileName = getOutputFileName("fasta",variables);
210                 string nameFileName = getOutputFileName("name",variables);
211                 string mapFileName = getOutputFileName("map",variables);
212                 
213                 if (groupfile != "") {
214                         //Parse sequences by group
215                         SequenceParser parser(groupfile, fastafile, namefile);
216                         vector<string> groups = parser.getNamesOfGroups();
217                         
218                         if (m->control_pressed) {  return 0; }
219                         
220                         //clears files
221                         ofstream out, out1, out2;
222                         m->openOutputFile(outputFileName, out); out.close(); 
223                         m->openOutputFile(nameFileName, out1); out1.close();
224                         mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.";
225                         
226                         vector<string> mapFileNames;
227                         if(processors == 1)     {       mapFileNames = driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups);       }
228                         else                            {       mapFileNames = createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups);                        }
229                         
230                         if (m->control_pressed) {    return 0;  }       
231                         
232                         for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); }
233                         
234                         //deconvolute results by running unique.seqs
235                         deconvoluteResults(outputFileName, nameFileName);
236                         
237                         if (m->control_pressed) {   return 0;   }                               
238                         
239                 }else{  
240                         vector<string> sequences;
241                         vector<string> uniqueNames;
242                         vector<string> redundantNames;
243                         vector<int> seqFreq;
244                         
245                         seqNoise noise;
246                         correctDist* correct = new correctDist(processors);
247                         
248                         //reads fasta and name file and loads them in order
249                         readData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq);
250                         if (m->control_pressed) { return 0; }
251                         
252                         //calc distances for cluster
253                         string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.dist";
254                         correct->execute(distFileName);
255                         delete correct;
256                         
257                         if (m->control_pressed) { m->mothurRemove(distFileName); return 0; }
258                         
259                         driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName); 
260                         outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName);
261                 }
262                 
263                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
264                 
265                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
266                 outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
267                 
268                 m->mothurOutEndLine();
269                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
270                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
271                 m->mothurOutEndLine();
272                 
273                 //set accnos file as new current accnosfile
274                 string current = "";
275                 itTypes = outputTypes.find("fasta");
276                 if (itTypes != outputTypes.end()) {
277                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
278                 }
279                 
280                 itTypes = outputTypes.find("name");
281                 if (itTypes != outputTypes.end()) {
282                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
283                 }
284                 
285                 
286                 return 0;
287         }
288         catch(exception& e) {
289                 m->errorOut(e, "ShhhSeqsCommand", "execute");
290                 exit(1);
291         }
292 }
293 //**********************************************************************************************************************
294 int ShhhSeqsCommand::readData(correctDist* correct, seqNoise& noise, vector<string>& seqs, vector<string>& uNames, vector<string>& rNames, vector<int>& freq) {
295         try {
296                 map<string, string> nameMap; 
297                 map<string, string>::iterator it;
298                 m->readNames(namefile, nameMap);
299                 bool error = false;
300                 
301                 ifstream in;
302                 m->openInputFile(fastafile, in);
303                 
304                 while (!in.eof()) {
305                         
306                         if (m->control_pressed) { in.close(); return 0; }
307                         
308                         Sequence seq(in); m->gobble(in);
309                         
310                         if (seq.getName() != "") {
311                                 correct->addSeq(seq.getName(), seq.getAligned());
312                                 
313                                 it = nameMap.find(seq.getName());
314                                 if (it != nameMap.end()) {
315                                         noise.addSeq(seq.getAligned(), seqs);
316                                         noise.addRedundantName(it->first, it->second, uNames, rNames, freq);
317                                 }else {
318                                         m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your namefile, please correct.");
319                                         error = true;
320                                 }
321                         }
322                 }
323                 in.close();
324                 
325                 if (error) { m->control_pressed = true; }
326                 
327                 return seqs.size();
328                 
329         }catch(exception& e) {
330                 m->errorOut(e, "ShhhSeqsCommand", "readData");
331                 exit(1);
332         }
333 }
334 //**********************************************************************************************************************
335 int ShhhSeqsCommand::loadData(correctDist* correct, seqNoise& noise, vector<string>& seqs, vector<string>& uNames, vector<string>& rNames, vector<int>& freq, map<string, string>& nameMap, vector<Sequence>& sequences) {
336         try {
337                 bool error = false;
338                 map<string, string>::iterator it;
339                 
340                 for (int i = 0; i < sequences.size(); i++) {
341                         
342                         if (m->control_pressed) { return 0; }
343                         
344                         if (sequences[i].getName() != "") {
345                                 correct->addSeq(sequences[i].getName(), sequences[i].getAligned());
346                                 
347                                 it = nameMap.find(sequences[i].getName());
348                                 if (it != nameMap.end()) {
349                                         noise.addSeq(sequences[i].getAligned(), seqs);
350                                         noise.addRedundantName(it->first, it->second, uNames, rNames, freq);
351                                 }else {
352                                         m->mothurOut("[ERROR]: " + sequences[i].getName() + " is in your fasta file and not in your namefile, please correct.");
353                                         error = true;
354                                 }
355                         }
356                 }
357                                 
358                 if (error) { m->control_pressed = true; }
359                 
360                 return seqs.size();
361                 
362         }catch(exception& e) {
363                 m->errorOut(e, "ShhhSeqsCommand", "loadData");
364                 exit(1);
365         }
366 }
367 /**************************************************************************************************/
368 vector<string> ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector<string> groups) {
369         try {
370                 
371                 vector<int> processIDS;
372                 int process = 1;
373                 vector<string> mapfileNames;
374                 
375                 //sanity check
376                 if (groups.size() < processors) { processors = groups.size(); }
377                 
378                 //divide the groups between the processors
379                 vector<linePair> lines;
380                 int numGroupsPerProcessor = groups.size() / processors;
381                 for (int i = 0; i < processors; i++) {
382                         int startIndex =  i * numGroupsPerProcessor;
383                         int endIndex = (i+1) * numGroupsPerProcessor;
384                         if(i == (processors - 1)){      endIndex = groups.size();       }
385                         lines.push_back(linePair(startIndex, endIndex));
386                 }
387                 
388 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
389                 
390                 //loop through and create all the processes you want
391                 while (process != processors) {
392                         int pid = fork();
393                         
394                         if (pid > 0) {
395                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
396                                 process++;
397                         }else if (pid == 0){
398                                 mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
399                                 
400                                 //pass filenames to parent
401                                 ofstream out;
402                                 string tempFile = newMName + toString(getpid()) + ".temp";
403                                 m->openOutputFile(tempFile, out);
404                                 out << mapfileNames.size() << endl;
405                                 for (int i = 0; i < mapfileNames.size(); i++) {
406                                         out << mapfileNames[i] << endl;
407                                 }
408                                 out.close();
409                                 
410                                 exit(0);
411                         }else { 
412                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
413                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
414                                 exit(0);
415                         }
416                 }
417                 
418                 //do my part
419                 mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
420                 
421                 //force parent to wait until all the processes are done
422                 for (int i=0;i<processIDS.size();i++) { 
423                         int temp = processIDS[i];
424                         wait(&temp);
425                 }
426                 
427                 //append output files
428                 for(int i=0;i<processIDS.size();i++){
429                         ifstream in;
430                         string tempFile =  newMName + toString(processIDS[i]) + ".temp";
431                         m->openInputFile(tempFile, in);
432                         if (!in.eof()) { 
433                                 int tempNum = 0; in >> tempNum;  m->gobble(in);
434                                 for (int j = 0; j < tempNum; j++) {
435                                         string filename;
436                                         in >> filename; m->gobble(in);
437                                         mapfileNames.push_back(filename);
438                                 }
439                         }
440                         in.close(); m->mothurRemove(tempFile);
441                         
442                 }
443 #else
444                 
445                 //////////////////////////////////////////////////////////////////////////////////////////////////////
446                 //Windows version shared memory, so be careful when passing variables through the shhhseqsData struct. 
447                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
448                 //////////////////////////////////////////////////////////////////////////////////////////////////////
449                 
450                 vector<shhhseqsData*> pDataArray; 
451                 DWORD   dwThreadIdArray[processors-1];
452                 HANDLE  hThreadArray[processors-1]; 
453                 
454                 //Create processor worker threads.
455                 for( int i=1; i<processors; i++ ){
456                         // Allocate memory for thread data.
457                         string extension = toString(i) + ".temp";
458
459                         shhhseqsData* tempShhhseqs = new shhhseqsData(fastafile, namefile, groupfile, (newFName+extension), (newNName+extension), newMName, groups, m, lines[i].start, lines[i].end, sigma, i);
460                         pDataArray.push_back(tempShhhseqs);
461                         processIDS.push_back(i);
462                         
463                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
464                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
465                         hThreadArray[i-1] = CreateThread(NULL, 0, MyShhhSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
466                 }
467                 
468                 
469                 //using the main process as a worker saves time and memory
470                 mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
471                 
472                 //Wait until all threads have terminated.
473                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
474                 
475                 //Close all thread handles and free memory allocations.
476                 for(int i=0; i < pDataArray.size(); i++){
477             if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
478                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
479             }
480                         for (int j = 0; j < pDataArray[i]->mapfileNames.size(); j++) {
481                                 mapfileNames.push_back(pDataArray[i]->mapfileNames[j]);
482                         }
483                         CloseHandle(hThreadArray[i]);
484                         delete pDataArray[i];
485                 }
486                 
487 #endif          
488                 
489                 //append output files
490                 for(int i=0;i<processIDS.size();i++){
491                         m->appendFiles((newFName + toString(processIDS[i]) + ".temp"), newFName);
492                         m->mothurRemove((newFName + toString(processIDS[i]) + ".temp"));
493                         
494                         m->appendFiles((newNName + toString(processIDS[i]) + ".temp"), newNName);
495                         m->mothurRemove((newNName + toString(processIDS[i]) + ".temp"));
496                 }
497                 
498                 return mapfileNames;    
499                 
500         }
501         catch(exception& e) {
502                 m->errorOut(e, "ShhhSeqsCommand", "createProcessesGroups");
503                 exit(1);
504         }
505 }
506 /**************************************************************************************************/
507 vector<string> ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
508         try {
509                 
510                 vector<string> mapFileNames;
511                 
512                 for (int i = start; i < end; i++) {
513                         
514                         start = time(NULL);
515                         
516                         if (m->control_pressed) {  return mapFileNames; }
517                         
518                         m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
519                         
520                         map<string, string> thisNameMap;
521                         thisNameMap = parser.getNameMap(groups[i]); 
522                         vector<Sequence> thisSeqs = parser.getSeqs(groups[i]);
523                         
524                         vector<string> sequences;
525                         vector<string> uniqueNames;
526                         vector<string> redundantNames;
527                         vector<int> seqFreq;
528                         
529                         seqNoise noise;
530                         correctDist* correct = new correctDist(1); //we use one processor since we already split up the work load.
531                         
532                         //load this groups info in order
533                         loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs);
534                         if (m->control_pressed) { return mapFileNames; }
535                         
536                         //calc distances for cluster
537                         string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + groups[i] + ".shhh.dist";
538                         correct->execute(distFileName);
539                         delete correct;
540                         
541                         if (m->control_pressed) { m->mothurRemove(distFileName); return mapFileNames; }
542                         
543                         driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map"); 
544                         
545                         if (m->control_pressed) { return mapFileNames; }
546                         
547                         m->appendFiles(newFFile+groups[i], newFFile); m->mothurRemove(newFFile+groups[i]);
548                         m->appendFiles(newNFile+groups[i], newNFile); m->mothurRemove(newNFile+groups[i]);
549                         mapFileNames.push_back(newMFile+groups[i]+".map");
550                         
551                         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + groups[i] + "."); m->mothurOutEndLine(); 
552                 }
553                 
554                 return mapFileNames;
555         }
556         catch(exception& e) {
557                 m->errorOut(e, "ShhhSeqsCommand", "driverGroups");
558                 exit(1);
559         }
560 }
561 //**********************************************************************************************************************
562 int ShhhSeqsCommand::driver(seqNoise& noise, 
563                                                         vector<string>& sequences, 
564                                                         vector<string>& uniqueNames, 
565                                                         vector<string>& redundantNames, 
566                                                         vector<int>& seqFreq, 
567                                                         string distFileName, string outputFileName, string nameFileName, string mapFileName) {
568         try {
569                 double cutOff = 0.08;
570                 int minIter = 10;
571                 int maxIter = 1000;
572                 double minDelta = 1e-6;
573                 int numIters = 0;
574                 double maxDelta = 1e6;
575                 int numSeqs = sequences.size();
576                                 
577                 //run cluster command
578                 string inputString = "phylip=" + distFileName + ", method=furthest, cutoff=0.08";
579                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
580                 m->mothurOut("Running command: cluster(" + inputString + ")"); m->mothurOutEndLine(); 
581                 
582                 Command* clusterCommand = new ClusterCommand(inputString);
583                 clusterCommand->execute();
584                 
585                 map<string, vector<string> > filenames = clusterCommand->getOutputFiles();
586                 string listFileName = filenames["list"][0];
587                 string rabundFileName = filenames["rabund"][0]; m->mothurRemove(rabundFileName);
588                 string sabundFileName = filenames["sabund"][0]; m->mothurRemove(sabundFileName);
589         
590                 delete clusterCommand;
591                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
592                 
593                 if (m->control_pressed) { m->mothurRemove(distFileName); m->mothurRemove(listFileName); return 0; }
594                 
595                 vector<double> distances(numSeqs * numSeqs);
596                 noise.getDistanceData(distFileName, distances);
597                 m->mothurRemove(distFileName); 
598                 if (m->control_pressed) { m->mothurRemove(listFileName); return 0; }
599                 
600                 vector<int> otuData(numSeqs);
601                 vector<int> otuFreq;
602                 vector<vector<int> > otuBySeqLookUp;
603                 noise.getListData(listFileName, cutOff, otuData, otuFreq, otuBySeqLookUp);
604                 m->mothurRemove(listFileName);
605                 if (m->control_pressed) { return 0; }
606                 
607                 int numOTUs = otuFreq.size();
608                 
609                 vector<double> weights(numOTUs, 0);
610                 vector<int> change(numOTUs, 1);
611                 vector<int> centroids(numOTUs, -1);
612                 vector<int> cumCount(numOTUs, 0);
613                 
614                 vector<double> tau(numSeqs, 1);
615                 vector<int> anP(numSeqs, 0);
616                 vector<int> anI(numSeqs, 0);
617                 vector<int> anN(numSeqs, 0);
618                 vector<vector<int> > aanI = otuBySeqLookUp;
619                 
620                 while(numIters < minIter || ((maxDelta > minDelta) && (numIters < maxIter))){
621                         
622                         if (m->control_pressed) { return 0; }
623                         
624                         noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); if (m->control_pressed) { return 0; }
625                         maxDelta = noise.calcNewWeights(weights, seqFreq, anI, cumCount, anP, otuFreq, tau);  if (m->control_pressed) { return 0; }
626                         
627                         noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (m->control_pressed) { return 0; }
628                         noise.checkCentroids(weights, centroids); if (m->control_pressed) { return 0; }
629                          
630                         otuFreq.assign(numOTUs, 0);
631                         
632                         int total = 0;
633                         
634                         for(int i=0;i<numSeqs;i++){
635                                 if (m->control_pressed) { return 0; }
636                                 
637                                 double offset = 1e6;
638                                 double norm = 0.0000;
639                                 double minWeight = 0.1;
640                                 vector<double> currentTau(numOTUs);
641                                 
642                                 for(int j=0;j<numOTUs;j++){
643                                         if (m->control_pressed) { return 0; }
644                                         if(weights[j] > minWeight && distances[i * numSeqs+centroids[j]] < offset){
645                                                 offset = distances[i * numSeqs+centroids[j]];
646                                         }
647                                 }
648                                 
649                                 for(int j=0;j<numOTUs;j++){
650                                         if (m->control_pressed) { return 0; }
651                                         if(weights[j] > minWeight){
652                                                 currentTau[j] = exp(sigma * (-distances[(i * numSeqs + centroids[j])] + offset)) * weights[j];
653                                                 norm += currentTau[j];
654                                         }
655                                         else{
656                                                 currentTau[j] = 0.0000;
657                                         }
658                                 }                       
659                                 
660                                 for(int j=0;j<numOTUs;j++){
661                                         if (m->control_pressed) { return 0; }
662                                         currentTau[j] /= norm;
663                                 }
664                                 
665                                 for(int j=0;j<numOTUs;j++){
666                                         if (m->control_pressed) { return 0; }
667                                         
668                                         if(currentTau[j] > 1.0e-4){
669                                                 int oldTotal = total;
670                                                 total++;
671                                                 
672                                                 tau.resize(oldTotal+1);
673                                                 tau[oldTotal] = currentTau[j];
674                                                 otuBySeqLookUp[j][otuFreq[j]] = oldTotal;
675                                                 aanI[j][otuFreq[j]] = i;
676                                                 otuFreq[j]++;
677                                                 
678                                         }
679                                 }
680                                 
681                                 anP.resize(total);
682                                 anI.resize(total);
683                         }
684                         
685                         numIters++;
686                 }
687                 
688                 noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount);  if (m->control_pressed) { return 0; }
689                 
690                 vector<double> percentage(numSeqs);
691                 noise.setUpOTUData(otuData, percentage, cumCount, tau, otuFreq, anP, anI);  if (m->control_pressed) { return 0; }
692                 noise.finishOTUData(otuData, otuFreq, anP, anI, cumCount, otuBySeqLookUp, aanI, tau);  if (m->control_pressed) { return 0; }
693                 
694                 change.assign(numOTUs, 1);
695                 noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (m->control_pressed) { return 0; }
696                 
697                 
698                 vector<int> finalTau(numOTUs, 0);
699                 for(int i=0;i<numSeqs;i++){
700                         if (m->control_pressed) { return 0; }
701                         finalTau[otuData[i]] += int(seqFreq[i]);
702                 }
703                 
704                 noise.writeOutput(outputFileName, nameFileName, mapFileName, finalTau, centroids, otuData, sequences, uniqueNames, redundantNames, seqFreq, distances);
705                 
706                 return 0;
707                 
708         }catch(exception& e) {
709                 m->errorOut(e, "ShhhSeqsCommand", "driver");
710                 exit(1);
711         }
712 }       
713 //**********************************************************************************************************************
714 int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){
715         try {
716                 m->mothurOutEndLine(); m->mothurOut("Deconvoluting results:"); m->mothurOutEndLine(); m->mothurOutEndLine();
717                 
718                 //use unique.seqs to create new name and fastafile
719                 string inputString = "fasta=" + fastaFile + ", name=" + nameFile;
720                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
721                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
722                 m->mothurCalling = true;
723         
724                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
725                 uniqueCommand->execute();
726                 
727                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
728                 
729                 delete uniqueCommand;
730                 m->mothurCalling = false;
731                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
732                 
733                 string newnameFile = filenames["name"][0];
734                 string newfastaFile = filenames["fasta"][0];
735                 
736                 m->mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); 
737                 if (nameFile != newnameFile) { m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); }
738                 
739                 return 0;
740         }
741         catch(exception& e) {
742                 m->errorOut(e, "ShhhSeqsCommand", "deconvoluteResults");
743                 exit(1);
744         }
745 }
746 //**********************************************************************************************************************
747
748
749