2 * pipelinepdscommand.cpp
5 * Created by westcott on 10/5/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::getValidParameters(){
17 string Array[] = {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "PipelineCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 vector<string> PipelineCommand::getRequiredParameters(){
29 vector<string> myArray;
33 m->errorOut(e, "PipelineCommand", "getRequiredParameters");
37 //**********************************************************************************************************************
38 vector<string> PipelineCommand::getRequiredFiles(){
40 vector<string> myArray;
44 m->errorOut(e, "PipelineCommand", "getRequiredFiles");
48 //**********************************************************************************************************************
49 PipelineCommand::PipelineCommand(string option) {
51 cFactory = CommandFactory::getInstance();
54 //allow user to run help
55 if(option == "help") { help(); abort = true; }
59 //valid paramters for this command
60 string AlignArray[] = {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
61 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
63 OptionParser parser(option);
64 map<string, string> parameters = parser.getParameters();
66 ValidParameters validParameter;
67 map<string, string>::iterator it;
69 //check to make sure all parameters are valid for command
70 for (it = parameters.begin(); it != parameters.end(); it++) {
71 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
74 //if the user changes the input directory command factory will send this info to us in the output parameter
75 string inputDir = validParameter.validFile(parameters, "inputdir", false);
76 if (inputDir == "not found"){ inputDir = ""; }
79 it = parameters.find("sff");
80 //user has given a template file
81 if(it != parameters.end()){
82 path = m->hasPath(it->second);
83 //if the user has not given a path then, add inputdir. else leave path alone.
84 if (path == "") { parameters["sff"] = inputDir + it->second; }
87 it = parameters.find("oligos");
88 //user has given a template file
89 if(it != parameters.end()){
90 path = m->hasPath(it->second);
91 //if the user has not given a path then, add inputdir. else leave path alone.
92 if (path == "") { parameters["oligos"] = inputDir + it->second; }
95 it = parameters.find("align");
96 //user has given a template file
97 if(it != parameters.end()){
98 path = m->hasPath(it->second);
99 //if the user has not given a path then, add inputdir. else leave path alone.
100 if (path == "") { parameters["align"] = inputDir + it->second; }
103 it = parameters.find("chimera");
104 //user has given a template file
105 if(it != parameters.end()){
106 path = m->hasPath(it->second);
107 //if the user has not given a path then, add inputdir. else leave path alone.
108 if (path == "") { parameters["chimera"] = inputDir + it->second; }
111 it = parameters.find("classify");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["classify"] = inputDir + it->second; }
119 it = parameters.find("taxonomy");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
127 it = parameters.find("pipeline");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["pipeline"] = inputDir + it->second; }
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 pipeFilename = validParameter.validFile(parameters, "pipeline", true);
139 if (pipeFilename == "not found") { pipeFilename = ""; }
140 else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
142 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
143 convert(temp, processors);
145 if (pipeFilename != "") {
146 abort = readUsersPipeline();
148 sffFile = validParameter.validFile(parameters, "sff", true);
149 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
150 else if (sffFile == "not open") { sffFile = ""; abort = true; }
152 oligosFile = validParameter.validFile(parameters, "oligos", true);
153 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
154 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
156 alignFile = validParameter.validFile(parameters, "align", true);
157 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
158 else if (alignFile == "not open") { alignFile = ""; abort = true; }
160 chimeraFile = validParameter.validFile(parameters, "chimera", true);
161 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
162 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
164 classifyFile = validParameter.validFile(parameters, "classify", true);
165 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
166 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
168 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
169 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
170 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
175 catch(exception& e) {
176 m->errorOut(e, "PipelineCommand", "PipelineCommand");
180 //**********************************************************************************************************************
182 void PipelineCommand::help(){
184 m->mothurOut("The pipeline command is designed to guide you through your analysis using mothur.\n");
185 m->mothurOut("The pipeline command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n");
186 m->mothurOut("The sff parameter allows you to enter your sff file. It is required.\n");
187 m->mothurOut("The oligos parameter allows you to enter your oligos file. It is required.\n");
188 m->mothurOut("The align parameter allows you to enter a template to use with the aligner. It is required.\n");
189 m->mothurOut("The chimera parameter allows you to enter a template to use for chimera detection. It is required.\n");
190 m->mothurOut("The classify parameter allows you to enter a template to use for classification. It is required.\n");
191 m->mothurOut("The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required.\n");
192 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
193 m->mothurOut("The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use mothurmade instead.\n");
194 m->mothurOut("First column contains the command name, and the second column contains the parameter options or 'defaults', meaning use defaults. You may leave out file options.\n");
195 m->mothurOut("Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n");
196 m->mothurOut("then, you could enter unique.seqs(fasta=mothurmade), and mothur would use the .trim.fasta file from the trim.seqs command. \n");
197 m->mothurOut("then you could enter align.seqs(candidate=mothurmade, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n");
198 m->mothurOut("If no pipeline file is given then mothur will use Pat's pipeline. \n\n");
199 m->mothurOut("Here is a list of the commands used in Pat's pipeline.\n");
200 m->mothurOut("All paralellized commands will use the processors you entered.\n");
201 m->mothurOut("The sffinfo command takes your sff file and extracts the fasta and quality files.\n");
202 m->mothurOut("The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n");
203 m->mothurOut("The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n");
204 m->mothurOut("The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n");
205 m->mothurOut("The align.seqs command aligns the unique sequences using the aligners default options. \n");
206 m->mothurOut("The screen.seqs command screens the sequences using optimize=end-minlength. \n");
207 m->mothurOut("The pipeline uses chimera.slayer to detect chimeras using the default options. \n");
208 m->mothurOut("The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n");
209 m->mothurOut("The filter.seqs command filters the sequences using vertical=T, trump=. \n");
210 m->mothurOut("The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n");
211 m->mothurOut("The pre.cluster command clusters sequences that have no more than 2 differences.\n");
212 m->mothurOut("The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n");
213 m->mothurOut("The pipeline uses cluster with method=average, hard=T. \n");
214 m->mothurOut("The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n");
215 m->mothurOut("The phylotype command is used to cluster the sequences based on their classification.\n");
216 m->mothurOut("The clearcut command is used to generate a tree using neighbor=T. \n");
217 m->mothurOut("The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n");
218 m->mothurOut("The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n");
219 m->mothurOut("The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n");
220 m->mothurOut("The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n");
221 m->mothurOut("The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n");
222 m->mothurOut("The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n");
223 m->mothurOut("\n\n");
225 catch(exception& e) {
226 m->errorOut(e, "PipelineCommand", "help");
231 //**********************************************************************************************************************
233 PipelineCommand::~PipelineCommand(){}
235 //**********************************************************************************************************************
237 int PipelineCommand::execute(){
239 if (abort == true) { return 0; }
241 int start = time(NULL);
243 if (pipeFilename == "") {
244 createPatsPipeline();
247 for (int i = 0; i < commands.size(); i++) {
248 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
250 if (m->control_pressed) { return 0; }
252 CommandOptionParser parser(commands[i]);
253 string commandName = parser.getCommandString();
254 string options = parser.getOptionString();
258 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
260 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
263 //executes valid command
264 Command* command = cFactory->getCommand(commandName, options, "pipe");
267 //add output files to list
268 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
269 map<string, vector<string> >::iterator itMade;
270 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
271 vector<string> temp = itMade->second;
272 for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
280 }else { runUsersPipeline(); }
282 if (m->control_pressed) { return 0; }
284 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
286 m->mothurOutEndLine();
287 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
289 m->mothurOutEndLine();
293 catch(exception& e) {
294 m->errorOut(e, "PipelineCommand", "execute");
298 //**********************************************************************************************************************
300 bool PipelineCommand::readUsersPipeline(){
304 m->openInputFile(pipeFilename, in);
306 string nextCommand = "";
308 map<string, vector<string> > mothurMadeFiles;
311 nextCommand = m->getline(in); m->gobble(in);
313 if (nextCommand[0] != '#') {
316 string commandName, options;
317 error = parseCommand(nextCommand, commandName, options);
319 if (error) { in.close(); return error; }
320 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
322 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
324 if (error) { in.close(); return error; }
332 catch(exception& e) {
333 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
337 //**********************************************************************************************************************
339 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
341 CommandOptionParser parser(nextCommand);
342 name = parser.getCommandString();
343 options = parser.getOptionString();
345 if (name == "") { return true; } //name == "" if () are not right
349 catch(exception& e) {
350 m->errorOut(e, "PipelineCommand", "parseCommand");
354 //**********************************************************************************************************************
356 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
359 if (name == "system") { return false; }
361 if (name == "system") { return false; }
363 //get shell of the command so we can check to make sure its valid without running it
364 Command* command = cFactory->getCommand(name);
366 //check to make sure all parameters are valid for command
367 vector<string> validParameters = command->getValidParameters();
369 OptionParser parser(options);
370 map<string, string> parameters = parser.getParameters();
372 ValidParameters validParameter;
373 map<string, string>::iterator it;
374 map<string, vector<string> >::iterator itMade;
376 for (it = parameters.begin(); it != parameters.end(); it++) {
378 if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
379 if (it->second == "mothurmade") {
380 itMade = mothurMadeFiles.find(it->first);
382 if (itMade == mothurMadeFiles.end()) {
383 if ((name == "align.seqs") && (it->first == "candidate")) {} //do nothing about candidate
385 m->mothurOut("You have the " + it->first + " listed as a mothurmade file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
392 //is the command missing any required
393 vector<string> requiredParameters = command->getRequiredParameters();
398 if (requiredParameters.size() > 2) {
399 if (requiredParameters[(requiredParameters.size()-1)] == "or") { hasOr = true; }
402 for (int i = 0; i < requiredParameters.size(); i++) {
403 it = parameters.find(requiredParameters[i]);
405 if (it != parameters.end()) { numFound++; }
407 if (!hasOr) { m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine(); }
411 // if all are needed and not all are found
412 if ((!hasOr) && (numFound != requiredParameters.size())) { return true; }
413 //if one is needed and none are found
414 else if ((hasOr) && (numFound == 0)) { return true; }
417 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
418 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
419 mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
424 catch(exception& e) {
425 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
429 //**********************************************************************************************************************
430 int PipelineCommand::runUsersPipeline(){
433 m->openInputFile(pipeFilename, in);
435 string nextCommand = "";
437 map<string, vector<string> > mothurMadeFiles;
440 nextCommand = m->getline(in); m->gobble(in);
442 if (nextCommand[0] != '#') {
443 CommandOptionParser parser(nextCommand);
444 string commandName = parser.getCommandString();
445 string options = parser.getOptionString();
447 if ((options != "") && (commandName != "system")) {
448 bool error = fillInMothurMade(options, mothurMadeFiles);
449 if (error) { in.close(); return 0; }
452 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
454 if (m->control_pressed) { return 0; }
458 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
460 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
463 //executes valid command
464 Command* command = cFactory->getCommand(commandName, options, "pipe");
467 //add output files to list
468 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
469 map<string, vector<string> >::iterator itMade;
470 map<string, vector<string> >::iterator it;
471 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
473 vector<string> temp = itMade->second;
474 for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
476 //update Mothur Made for each file
477 it = mothurMadeFiles.find(itMade->first);
479 if (it == mothurMadeFiles.end()) { //new type
481 mothurMadeFiles[itMade->first] = temp;
483 }else{ //update existing type
484 vector<string> oldFileNames = it->second;
485 //look at new files, see if an old version of the file exists, if so update, else just add.
486 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
488 for (int k = 0; k < temp.size(); k++) {
491 string root = m->getSimpleName(temp[k]);
492 string individual = "";
493 for(int i=0;i<root.length();i++){
498 individual += root[i];
502 //look for that base name in oldfiles
504 for (int l = 0; l < oldFileNames.size(); l++) {
505 int pos = oldFileNames[l].find(root);
506 if (pos != string::npos) {
512 //if you found it update it, else add it
514 mothurMadeFiles[it->first][spot] = temp[k];
516 mothurMadeFiles[it->first].push_back(temp[k]);
530 catch(exception& e) {
531 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
535 //**********************************************************************************************************************
536 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
539 OptionParser parser(options);
540 map<string, string> parameters = parser.getParameters();
541 map<string, string>::iterator it;
542 map<string, vector<string> >::iterator itMade;
546 //fill in mothurmade filenames
547 for (it = parameters.begin(); it != parameters.end(); it++) {
548 string paraType = it->first;
549 string tempOption = it->second;
551 if (tempOption == "mothurmade") {
553 if (it->first == "candidate") { paraType = "fasta"; }
555 itMade = mothurMadeFiles.find(paraType);
557 if (itMade == mothurMadeFiles.end()) {
558 m->mothurOut("Looking for a mothurmade " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
561 vector<string> temp = itMade->second;
563 if (temp.size() > 1) {
564 //ask user which file to use
565 m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
566 for (int i = 0; i < temp.size(); i++) {
567 m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
570 m->mothurOut("Please select the number of the file you would like to use: ");
573 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
575 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
577 tempOption = temp[num];
580 //clears buffer so next command doesn't have error
584 vector<string> newTemp;
585 for (int i = 0; i < temp.size(); i++) {
586 if (i == num) { newTemp.push_back(temp[i]); }
588 m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
591 m->mothurOutJustToLog(response); m->mothurOutEndLine();
593 if (response == "n") { newTemp.push_back(temp[i]); }
595 //clears buffer so next command doesn't have error
601 mothurMadeFiles[paraType] = newTemp;
604 }else if (temp.size() == 0){
605 m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
608 tempOption = temp[0];
613 options += it->first + "=" + tempOption + ", ";
616 //rip off extra comma
617 options = options.substr(0, (options.length()-2));
621 catch(exception& e) {
622 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
627 //**********************************************************************************************************************
628 void PipelineCommand::createPatsPipeline(){
632 string thisCommand = "sffinfo(sff=" + sffFile + ")";
633 commands.push_back(thisCommand);
636 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
637 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
638 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
639 commands.push_back(thisCommand);
642 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
643 qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
644 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
645 thisCommand = "unique.seqs(fasta=" + fastaFile + ")";
646 commands.push_back(thisCommand);
649 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
650 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
651 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")";
652 commands.push_back(thisCommand);
655 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
656 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)";
657 commands.push_back(thisCommand);
660 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
661 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
662 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
663 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")";
664 commands.push_back(thisCommand);
667 string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
668 thisCommand = "remove.seqs(fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", accnos=" + accnosFile + ", dups=T)";
669 commands.push_back(thisCommand);
672 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
673 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
674 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
675 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", vertical=T, trump=.)";
676 commands.push_back(thisCommand);
679 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
680 thisCommand = "unique.seqs(fasta=" + fastaFile + ", name=" + nameFile + ")";
681 commands.push_back(thisCommand);
684 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
685 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
686 thisCommand = "pre.cluster(fasta=" + fastaFile + ", name=" + nameFile + ", diffs=2)";
687 commands.push_back(thisCommand);
690 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
691 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
692 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", cutoff=0.20)";
693 commands.push_back(thisCommand);
696 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
697 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", output=lt)";
698 commands.push_back(thisCommand);
701 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
702 thisCommand = "read.dist(column=" + columnFile + ", name=" + nameFile + ")";
703 commands.push_back(thisCommand);
706 thisCommand = "cluster(method=average, hard=T)";
707 commands.push_back(thisCommand);
709 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
710 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
713 thisCommand = "degap.seqs(fasta=" + fastaFile + ")";
714 commands.push_back(thisCommand);
717 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
718 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
719 commands.push_back(thisCommand);
721 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
722 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
723 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
724 RippedTaxName += ".";
726 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
727 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
730 thisCommand = "phylotype(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ")";
731 commands.push_back(thisCommand);
733 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
734 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
737 thisCommand = "clearcut(phylip=" + phylipFile + ", neighbor=T)";
738 commands.push_back(thisCommand);
740 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
743 thisCommand = "read.otu(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
744 commands.push_back(thisCommand);
746 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
749 thisCommand = "read.otu(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
750 commands.push_back(thisCommand);
752 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
755 thisCommand = "read.otu(shared=" + sharedFile + ")";
756 commands.push_back(thisCommand);
759 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
760 commands.push_back(thisCommand);
763 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
764 commands.push_back(thisCommand);
767 thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
768 commands.push_back(thisCommand);
771 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
772 commands.push_back(thisCommand);
775 thisCommand = "read.otu(shared=" + phyloSharedFile + ")";
776 commands.push_back(thisCommand);
779 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
780 commands.push_back(thisCommand);
783 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
784 commands.push_back(thisCommand);
787 thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
788 commands.push_back(thisCommand);
791 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
792 commands.push_back(thisCommand);
795 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
796 commands.push_back(thisCommand);
799 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
800 commands.push_back(thisCommand);
803 thisCommand = "read.tree(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
804 commands.push_back(thisCommand);
807 thisCommand = "phylo.diversity(iters=100,rarefy=T)";
808 commands.push_back(thisCommand);
811 thisCommand = "unifrac.weighted(random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
812 commands.push_back(thisCommand);
815 thisCommand = "unifrac.unweighted(random=false, distance=true, processors=" + toString(processors) + ")";
816 commands.push_back(thisCommand);
820 catch(exception& e) {
821 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
826 //**********************************************************************************************************************