]> git.donarmstrong.com Git - mothur.git/blob - pipelinepdscommand.cpp
done testing 1.14.0
[mothur.git] / pipelinepdscommand.cpp
1 /*
2  *  pipelinepdscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/5/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "PipelineCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 vector<string> PipelineCommand::getRequiredParameters(){        
28         try {
29                 vector<string> myArray;
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "PipelineCommand", "getRequiredParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> PipelineCommand::getRequiredFiles(){     
39         try {
40                 vector<string> myArray;
41                 return myArray;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "PipelineCommand", "getRequiredFiles");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 PipelineCommand::PipelineCommand(string option) {
50         try {
51                 cFactory = CommandFactory::getInstance();
52                 abort = false;
53                 
54                 //allow user to run help
55                 if(option == "help") { help(); abort = true; }
56                 
57                 else {
58                         
59                         //valid paramters for this command
60                         string AlignArray[] =  {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
61                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
62                         
63                         OptionParser parser(option);
64                         map<string, string> parameters = parser.getParameters(); 
65                         
66                         ValidParameters validParameter;
67                         map<string, string>::iterator it;
68                         
69                         //check to make sure all parameters are valid for command
70                         for (it = parameters.begin(); it != parameters.end(); it++) { 
71                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
72                         }
73                         
74                         //if the user changes the input directory command factory will send this info to us in the output parameter 
75                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
76                         if (inputDir == "not found"){   inputDir = "";          }
77                         else {
78                                 string path;
79                                 it = parameters.find("sff");
80                                 //user has given a template file
81                                 if(it != parameters.end()){ 
82                                         path = m->hasPath(it->second);
83                                         //if the user has not given a path then, add inputdir. else leave path alone.
84                                         if (path == "") {       parameters["sff"] = inputDir + it->second;              }
85                                 }
86                                 
87                                 it = parameters.find("oligos");
88                                 //user has given a template file
89                                 if(it != parameters.end()){ 
90                                         path = m->hasPath(it->second);
91                                         //if the user has not given a path then, add inputdir. else leave path alone.
92                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
93                                 }
94                                 
95                                 it = parameters.find("align");
96                                 //user has given a template file
97                                 if(it != parameters.end()){ 
98                                         path = m->hasPath(it->second);
99                                         //if the user has not given a path then, add inputdir. else leave path alone.
100                                         if (path == "") {       parameters["align"] = inputDir + it->second;            }
101                                 }
102                                 
103                                 it = parameters.find("chimera");
104                                 //user has given a template file
105                                 if(it != parameters.end()){ 
106                                         path = m->hasPath(it->second);
107                                         //if the user has not given a path then, add inputdir. else leave path alone.
108                                         if (path == "") {       parameters["chimera"] = inputDir + it->second;          }
109                                 }
110                                 
111                                 it = parameters.find("classify");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["classify"] = inputDir + it->second;         }
117                                 }
118                                 
119                                 it = parameters.find("taxonomy");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
125                                 }
126                                 
127                                 it = parameters.find("pipeline");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["pipeline"] = inputDir + it->second;         }
133                                 }
134                         }
135                         
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
137                         
138                         pipeFilename = validParameter.validFile(parameters, "pipeline", true);
139                         if (pipeFilename == "not found") { pipeFilename = "";  }
140                         else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
141                         
142                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = "1";                             }
143                         convert(temp, processors); 
144                         
145                         if (pipeFilename != "") {
146                                 abort = readUsersPipeline();
147                         }else{
148                                 sffFile = validParameter.validFile(parameters, "sff", true);
149                                 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true;  }
150                                 else if (sffFile == "not open") { sffFile = ""; abort = true; }
151                                         
152                                 oligosFile = validParameter.validFile(parameters, "oligos", true);
153                                 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true;  }
154                                 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
155                                         
156                                 alignFile = validParameter.validFile(parameters, "align", true);
157                                 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true;  }
158                                 else if (alignFile == "not open") { alignFile = ""; abort = true; }
159
160                                 chimeraFile = validParameter.validFile(parameters, "chimera", true);
161                                 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true;  }
162                                 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
163
164                                 classifyFile = validParameter.validFile(parameters, "classify", true);
165                                 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true;  }
166                                 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
167
168                                 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
169                                 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true;  }
170                                 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
171                         }
172                 }
173
174         }
175         catch(exception& e) {
176                 m->errorOut(e, "PipelineCommand", "PipelineCommand");
177                 exit(1);
178         }
179 }
180 //**********************************************************************************************************************
181
182 void PipelineCommand::help(){
183         try {
184                  m->mothurOut("The pipeline command is designed to guide you through your analysis using mothur.\n"); 
185                  m->mothurOut("The pipeline command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n"); 
186                  m->mothurOut("The sff parameter allows you to enter your sff file. It is required.\n"); 
187                  m->mothurOut("The oligos parameter allows you to enter your oligos file. It is required.\n"); 
188                  m->mothurOut("The align parameter allows you to enter a template to use with the aligner. It is required.\n"); 
189                  m->mothurOut("The chimera parameter allows you to enter a template to use for chimera detection. It is required.\n"); 
190                  m->mothurOut("The classify parameter allows you to enter a template to use for classification. It is required.\n"); 
191                  m->mothurOut("The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required.\n"); 
192                  m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
193                  m->mothurOut("The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use mothurmade instead.\n"); 
194                  m->mothurOut("First column contains the command name, and the second column contains the parameter options or 'defaults', meaning use defaults. You may leave out file options.\n");
195                  m->mothurOut("Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n");
196                  m->mothurOut("then, you could enter unique.seqs(fasta=mothurmade), and mothur would use the .trim.fasta file from the trim.seqs command. \n");
197                  m->mothurOut("then you could enter align.seqs(candidate=mothurmade, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n");
198                  m->mothurOut("If no pipeline file is given then mothur will use Pat's pipeline. \n\n");
199                  m->mothurOut("Here is a list of the commands used in Pat's pipeline.\n"); 
200                  m->mothurOut("All paralellized commands will use the processors you entered.\n"); 
201                  m->mothurOut("The sffinfo command takes your sff file and extracts the fasta and quality files.\n"); 
202                  m->mothurOut("The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n"); 
203                  m->mothurOut("The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n"); 
204                  m->mothurOut("The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n"); 
205                  m->mothurOut("The align.seqs command aligns the unique sequences using the aligners default options. \n"); 
206                  m->mothurOut("The screen.seqs command screens the sequences using optimize=end-minlength. \n"); 
207                  m->mothurOut("The pipeline uses chimera.slayer to detect chimeras using the default options. \n");
208                  m->mothurOut("The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n");
209                  m->mothurOut("The filter.seqs command filters the sequences using vertical=T, trump=. \n"); 
210                  m->mothurOut("The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n"); 
211                  m->mothurOut("The pre.cluster command clusters sequences that have no more than 2 differences.\n"); 
212                  m->mothurOut("The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n"); 
213                  m->mothurOut("The pipeline uses cluster with method=average, hard=T. \n");
214                  m->mothurOut("The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n"); 
215                  m->mothurOut("The phylotype command is used to cluster the sequences based on their classification.\n"); 
216                  m->mothurOut("The clearcut command is used to generate a tree using neighbor=T. \n");
217                  m->mothurOut("The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n");
218                  m->mothurOut("The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n");
219                  m->mothurOut("The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n");
220                  m->mothurOut("The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n");
221                  m->mothurOut("The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n");
222                  m->mothurOut("The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n");
223                  m->mothurOut("\n\n");
224         }
225         catch(exception& e) {
226                 m->errorOut(e, "PipelineCommand", "help");
227                 exit(1);
228         }
229 }
230
231 //**********************************************************************************************************************
232
233 PipelineCommand::~PipelineCommand(){}
234
235 //**********************************************************************************************************************
236
237 int PipelineCommand::execute(){
238         try {
239                 if (abort == true) { return 0; }
240                 
241                 int start = time(NULL);
242                 
243                 if (pipeFilename == "") { 
244                         createPatsPipeline(); 
245                         
246                         //run Pats pipeline
247                         for (int i = 0; i < commands.size(); i++) {
248                                 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
249                                                         
250                                 if (m->control_pressed) { return 0; }
251
252                                 CommandOptionParser parser(commands[i]);
253                                 string commandName = parser.getCommandString();
254                                 string options = parser.getOptionString();
255                                         
256                                 #ifdef USE_MPI
257                                         int pid;
258                                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
259                                         
260                                         if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
261                                 #endif
262                                 
263                                 //executes valid command
264                                 Command* command = cFactory->getCommand(commandName, options, "pipe");
265                                 command->execute();
266                                 
267                                 //add output files to list
268                                 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
269                                 map<string, vector<string> >::iterator itMade;
270                                 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) { 
271                                         vector<string> temp = itMade->second; 
272                                         for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
273                                 }
274                                                                         
275                                 #ifdef USE_MPI
276                                         }
277                                 #endif
278                         }
279                         
280                 }else {  runUsersPipeline(); }
281                 
282                 if (m->control_pressed) { return 0; }
283                 
284                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
285                 
286                 m->mothurOutEndLine();
287                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
289                 m->mothurOutEndLine();
290                 
291                 return 0;
292         }
293         catch(exception& e) {
294                 m->errorOut(e, "PipelineCommand", "execute");
295                 exit(1);
296         }
297 }
298 //**********************************************************************************************************************
299
300 bool PipelineCommand::readUsersPipeline(){
301         try {
302                 
303                 ifstream in;
304                 m->openInputFile(pipeFilename, in);
305                 
306                 string nextCommand = "";
307                 
308                 map<string, vector<string> > mothurMadeFiles;
309                 
310                 while(!in.eof()) {
311                         nextCommand = m->getline(in); m->gobble(in);
312
313                         if (nextCommand[0] != '#') {
314                                 bool error = false;
315                                 
316                                 string commandName, options;
317                                 error = parseCommand(nextCommand, commandName, options);
318                                 
319                                 if (error) { in.close(); return error; }
320                                 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
321                                 
322                                 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
323                                 
324                                 if (error) { in.close(); return error; }
325                         }
326                 }
327                 
328                 in.close();
329                 
330                 return false;
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
334                 exit(1);
335         }
336 }
337 //**********************************************************************************************************************
338
339 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
340         try {
341                 CommandOptionParser parser(nextCommand);
342                 name = parser.getCommandString();
343                 options = parser.getOptionString();
344                 
345                 if (name == "") { return true; } //name == "" if () are not right
346                 
347                 return false;
348         }
349         catch(exception& e) {
350                 m->errorOut(e, "PipelineCommand", "parseCommand");
351                 exit(1);
352         }
353 }
354 //**********************************************************************************************************************
355
356 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
357         try {
358                                 
359                 if (name == "system") { return false; }
360                 
361                 if (name == "system") { return false; }
362                 
363                 //get shell of the command so we can check to make sure its valid without running it
364                 Command* command = cFactory->getCommand(name);
365                         
366                 //check to make sure all parameters are valid for command
367                 vector<string> validParameters = command->getValidParameters();
368                 
369                 OptionParser parser(options);
370                 map<string, string> parameters = parser.getParameters(); 
371                         
372                 ValidParameters validParameter;
373                 map<string, string>::iterator it;
374                 map<string, vector<string> >::iterator itMade;
375                         
376                 for (it = parameters.begin(); it != parameters.end(); it++) { 
377                         
378                         if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) {  return true;  } // not valid
379                         if (it->second == "mothurmade") {
380                                 itMade = mothurMadeFiles.find(it->first);
381                                 
382                                 if (itMade == mothurMadeFiles.end()) {  
383                                         if ((name == "align.seqs") && (it->first == "candidate")) {} //do nothing about candidate
384                                         else {
385                                                 m->mothurOut("You have the " + it->first + " listed as a mothurmade file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
386                                                 return true;
387                                         }
388                                 }
389                         }
390                 }
391                         
392                 //is the command missing any required
393                 vector<string> requiredParameters = command->getRequiredParameters();
394                 
395                 //check for or
396                 bool hasOr = false;
397                 int numFound = 0;
398                 if (requiredParameters.size() > 2) {  
399                         if (requiredParameters[(requiredParameters.size()-1)] == "or") { hasOr = true; }
400                 }
401                         
402                 for (int i = 0; i < requiredParameters.size(); i++) { 
403                         it = parameters.find(requiredParameters[i]);
404                         
405                         if (it != parameters.end()) { numFound++; }
406                         else {
407                                 if (!hasOr) { m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine(); }
408                         }
409                 }
410                         
411                 // if all are needed and not all are found
412                 if ((!hasOr) && (numFound != requiredParameters.size())) { return true; }
413                 //if one is needed and none are found
414                 else if ((hasOr) && (numFound == 0)) { return true; }
415                 
416                 //update MothurMade
417                 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
418                 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) { 
419                         mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
420                 }
421                         
422                 return false;
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
426                 exit(1);
427         }
428 }
429 //**********************************************************************************************************************
430 int PipelineCommand::runUsersPipeline(){
431         try {
432                 ifstream in;
433                 m->openInputFile(pipeFilename, in);
434                 
435                 string nextCommand = "";
436                 
437                 map<string, vector<string> > mothurMadeFiles;
438                 
439                 while(!in.eof()) {
440                         nextCommand = m->getline(in);  m->gobble(in);
441                 
442                         if (nextCommand[0] != '#') {
443                                 CommandOptionParser parser(nextCommand);
444                                 string commandName = parser.getCommandString();
445                                 string options = parser.getOptionString();
446                                         
447                                 if ((options != "") && (commandName != "system")) {
448                                         bool error = fillInMothurMade(options, mothurMadeFiles);
449                                         if (error) { in.close(); return 0; }
450                                 }
451                                 
452                                 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
453                                                                 
454                                 if (m->control_pressed) { return 0; }
455                                         
456                                 #ifdef USE_MPI
457                                         int pid;
458                                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
459                                         
460                                         if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
461                                 #endif
462                                 
463                                 //executes valid command
464                                 Command* command = cFactory->getCommand(commandName, options, "pipe");
465                                 command->execute();
466                                 
467                                 //add output files to list
468                                 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
469                                 map<string, vector<string> >::iterator itMade;
470                                 map<string, vector<string> >::iterator it;
471                                 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) { 
472                 
473                                         vector<string> temp = itMade->second;
474                                         for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); }  //
475                                         
476                                         //update Mothur Made for each file
477                                         it = mothurMadeFiles.find(itMade->first);
478                                         
479                                         if (it == mothurMadeFiles.end()) { //new type
480                         
481                                                 mothurMadeFiles[itMade->first] = temp;
482                                 
483                                         }else{ //update existing type
484                                                 vector<string> oldFileNames = it->second;
485                                                 //look at new files, see if an old version of the file exists, if so update, else just add.
486                                                 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
487                                                 
488                                                 for (int k = 0; k < temp.size(); k++) {
489                                                         
490                                                         //get base name
491                                                         string root = m->getSimpleName(temp[k]);
492                                                         string individual = "";
493                                                         for(int i=0;i<root.length();i++){
494                                                                 if(root[i] == '.'){
495                                                                         root = individual;
496                                                                         break;
497                                                                 }else{
498                                                                         individual += root[i];
499                                                                 }
500                                                         }
501                                                         
502                                                         //look for that base name in oldfiles
503                                                         int spot = -1;
504                                                         for (int l = 0; l < oldFileNames.size(); l++) {
505                                                                 int pos = oldFileNames[l].find(root);
506                                                                 if (pos != string::npos) {
507                                                                         spot = l;
508                                                                         break;
509                                                                 }
510                                                         }
511                                                         
512                                                         //if you found it update it, else add it
513                                                         if (spot != -1) {
514                                                                 mothurMadeFiles[it->first][spot] = temp[k];
515                                                         }else{
516                                                                 mothurMadeFiles[it->first].push_back(temp[k]);
517                                                         }
518                                                 }
519                                         }
520                                 }
521                                                                         
522                                 #ifdef USE_MPI
523                                         }
524                                 #endif
525                         }
526                 }
527                 
528                 in.close();
529         }
530         catch(exception& e) {
531                 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
532                 exit(1);
533         }
534 }
535 //**********************************************************************************************************************
536 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
537         try {
538                 
539                 OptionParser parser(options);
540                 map<string, string> parameters = parser.getParameters(); 
541                 map<string, string>::iterator it;
542                 map<string, vector<string> >::iterator itMade;
543                 
544                 options = "";
545                 
546                 //fill in mothurmade filenames
547                 for (it = parameters.begin(); it != parameters.end(); it++) { 
548                         string paraType = it->first;
549                         string tempOption = it->second;
550                         
551                         if (tempOption == "mothurmade") {
552                                 
553                                 if (it->first == "candidate") { paraType = "fasta"; }
554                         
555                                 itMade = mothurMadeFiles.find(paraType);
556                                 
557                                 if (itMade == mothurMadeFiles.end()) { 
558                                         m->mothurOut("Looking for a mothurmade " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
559                                         return true;
560                                 }else{
561                                         vector<string> temp = itMade->second;
562                                         
563                                         if (temp.size() > 1) {
564                                                 //ask user which file to use
565                                                 m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
566                                                 for (int i = 0; i < temp.size(); i++) {
567                                                         m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
568                                                 }
569                                                 
570                                                 m->mothurOut("Please select the number of the file you would like to use: ");
571                                                 int num = 0;
572                                                 cin >> num;
573                                                 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
574                                                 
575                                                 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
576                                                 else {
577                                                         tempOption = temp[num];
578                                                 }
579                                 
580                                                 //clears buffer so next command doesn't have error
581                                                 string s;       
582                                                 getline(cin, s);
583                                                 
584                                                 vector<string> newTemp;
585                                                 for (int i = 0; i < temp.size(); i++) {
586                                                         if (i == num) { newTemp.push_back(temp[i]); }
587                                                         else {
588                                                                 m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
589                                                                 string response;
590                                                                 cin >> response;
591                                                                 m->mothurOutJustToLog(response); m->mothurOutEndLine();
592                                                         
593                                                                 if (response == "n") {  newTemp.push_back(temp[i]); }
594                                                         
595                                                                 //clears buffer so next command doesn't have error
596                                                                 string s;       
597                                                                 getline(cin, s);
598                                                         }
599                                                 }
600                                                 
601                                                 mothurMadeFiles[paraType] = newTemp;
602                                                 
603                                                 
604                                         }else if (temp.size() == 0){
605                                                 m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
606                                                 return true;
607                                         }else{
608                                                 tempOption = temp[0];
609                                         }
610                                 }
611                         }
612                         
613                         options += it->first + "=" + tempOption + ", ";
614                 }
615                 
616                 //rip off extra comma
617                 options = options.substr(0, (options.length()-2));
618                 
619                 return false;
620         }
621         catch(exception& e) {
622                 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
623                 exit(1);
624         }
625 }
626
627 //**********************************************************************************************************************
628 void PipelineCommand::createPatsPipeline(){
629         try {
630                 
631                 //sff.info command
632                 string thisCommand = "sffinfo(sff=" + sffFile + ")";
633                 commands.push_back(thisCommand);
634                 
635                 //trim.seqs command
636                 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
637                 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
638                 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
639                 commands.push_back(thisCommand);
640                 
641                 //unique.seqs
642                 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
643                 qualFile =  m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
644                 fastaFile =  m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
645                 thisCommand = "unique.seqs(fasta=" + fastaFile + ")"; 
646                 commands.push_back(thisCommand);
647                 
648                 //align.seqs
649                 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
650                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
651                 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")";
652                 commands.push_back(thisCommand);
653                 
654                 //screen.seqs
655                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
656                 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)";
657                 commands.push_back(thisCommand);
658                 
659                 //chimera.slayer
660                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
661                 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
662                 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
663                 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")";
664                 commands.push_back(thisCommand);
665                 
666                 //remove.seqs
667                 string accnosFile = m->getRootName(m->getSimpleName(fastaFile))  + "slayer.accnos";
668                 thisCommand = "remove.seqs(fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", accnos=" + accnosFile + ", dups=T)";
669                 commands.push_back(thisCommand);
670                 
671                 //filter.seqs
672                 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
673                 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
674                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
675                 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", vertical=T, trump=.)";
676                 commands.push_back(thisCommand);
677                 
678                 //unique.seqs
679                 fastaFile =  m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
680                 thisCommand = "unique.seqs(fasta=" + fastaFile + ", name=" + nameFile + ")"; 
681                 commands.push_back(thisCommand);
682                 
683                 //pre.cluster
684                 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
685                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
686                 thisCommand = "pre.cluster(fasta=" + fastaFile + ", name=" + nameFile + ", diffs=2)"; 
687                 commands.push_back(thisCommand);
688                 
689                 //dist.seqs
690                 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
691                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
692                 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", cutoff=0.20)";
693                 commands.push_back(thisCommand);
694                 
695                 //dist.seqs
696                 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
697                 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", output=lt)";
698                 commands.push_back(thisCommand);
699                 
700                 //read.dist
701                 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
702                 thisCommand = "read.dist(column=" + columnFile + ", name=" + nameFile + ")";
703                 commands.push_back(thisCommand);
704                 
705                 //cluster
706                 thisCommand = "cluster(method=average, hard=T)";
707                 commands.push_back(thisCommand);
708                 
709                 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
710                 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
711                 
712                 //degap.seqs
713                 thisCommand = "degap.seqs(fasta=" + fastaFile + ")";
714                 commands.push_back(thisCommand);
715                 
716                 //classify.seqs
717                 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
718                 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
719                 commands.push_back(thisCommand);
720                 
721                 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
722                 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
723                 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
724                 RippedTaxName +=  "."; 
725                 
726                 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
727                 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
728                 
729                 //phylotype
730                 thisCommand = "phylotype(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ")";
731                 commands.push_back(thisCommand);
732                 
733                 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
734                 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
735                 
736                 //clearcut
737                 thisCommand = "clearcut(phylip=" + phylipFile + ", neighbor=T)";
738                 commands.push_back(thisCommand);
739                 
740                 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
741                 
742                 //read.otu
743                 thisCommand = "read.otu(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
744                 commands.push_back(thisCommand);
745                 
746                 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
747                 
748                 //read.otu
749                 thisCommand = "read.otu(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
750                 commands.push_back(thisCommand);
751                 
752                 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
753                 
754                 //read.otu
755                 thisCommand = "read.otu(shared=" + sharedFile + ")";
756                 commands.push_back(thisCommand);
757
758                 //summary.single
759                 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
760                 commands.push_back(thisCommand);
761                 
762                 //summary.shared
763                 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
764                 commands.push_back(thisCommand);
765                 
766                 //read.otu
767                 thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
768                 commands.push_back(thisCommand);
769                 
770                 //summary.single
771                 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
772                 commands.push_back(thisCommand);
773                 
774                 //read.otu
775                 thisCommand = "read.otu(shared=" + phyloSharedFile + ")";
776                 commands.push_back(thisCommand);
777                 
778                 //summary.single
779                 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
780                 commands.push_back(thisCommand);
781                 
782                 //summary.shared
783                 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
784                 commands.push_back(thisCommand);
785
786                 //read.otu
787                 thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
788                 commands.push_back(thisCommand);
789                 
790                 //summary.single
791                 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
792                 commands.push_back(thisCommand);
793                 
794                 //classify.otu
795                 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
796                 commands.push_back(thisCommand);
797                 
798                 //classify.otu
799                 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
800                 commands.push_back(thisCommand);
801                 
802                 //read.tree
803                 thisCommand = "read.tree(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
804                 commands.push_back(thisCommand);
805                 
806                 //phylo.diversity
807                 thisCommand = "phylo.diversity(iters=100,rarefy=T)";
808                 commands.push_back(thisCommand);
809                 
810                 //unifrac.weighted
811                 thisCommand = "unifrac.weighted(random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
812                 commands.push_back(thisCommand);
813                 
814                 //unifrac.unweighted
815                 thisCommand = "unifrac.unweighted(random=false, distance=true, processors=" + toString(processors) + ")";
816                 commands.push_back(thisCommand);
817                 
818                 
819         }
820         catch(exception& e) {
821                 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
822                 exit(1);
823         }
824 }
825
826 //**********************************************************************************************************************