2 * pipelinepdscommand.cpp
5 * Created by westcott on 10/5/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::getValidParameters(){
17 string Array[] = {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "PipelineCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 vector<string> PipelineCommand::getRequiredParameters(){
29 vector<string> myArray;
33 m->errorOut(e, "PipelineCommand", "getRequiredParameters");
37 //**********************************************************************************************************************
38 vector<string> PipelineCommand::getRequiredFiles(){
40 vector<string> myArray;
44 m->errorOut(e, "PipelineCommand", "getRequiredFiles");
48 //**********************************************************************************************************************
49 PipelineCommand::PipelineCommand(string option) {
51 cFactory = CommandFactory::getInstance();
54 //allow user to run help
55 if(option == "help") { help(); abort = true; }
59 //valid paramters for this command
60 string AlignArray[] = {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
61 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
63 OptionParser parser(option);
64 map<string, string> parameters = parser.getParameters();
66 ValidParameters validParameter;
67 map<string, string>::iterator it;
69 //check to make sure all parameters are valid for command
70 for (it = parameters.begin(); it != parameters.end(); it++) {
71 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
74 //if the user changes the input directory command factory will send this info to us in the output parameter
75 string inputDir = validParameter.validFile(parameters, "inputdir", false);
76 if (inputDir == "not found"){ inputDir = ""; }
79 it = parameters.find("sff");
80 //user has given a template file
81 if(it != parameters.end()){
82 path = m->hasPath(it->second);
83 //if the user has not given a path then, add inputdir. else leave path alone.
84 if (path == "") { parameters["sff"] = inputDir + it->second; }
87 it = parameters.find("oligos");
88 //user has given a template file
89 if(it != parameters.end()){
90 path = m->hasPath(it->second);
91 //if the user has not given a path then, add inputdir. else leave path alone.
92 if (path == "") { parameters["oligos"] = inputDir + it->second; }
95 it = parameters.find("align");
96 //user has given a template file
97 if(it != parameters.end()){
98 path = m->hasPath(it->second);
99 //if the user has not given a path then, add inputdir. else leave path alone.
100 if (path == "") { parameters["align"] = inputDir + it->second; }
103 it = parameters.find("chimera");
104 //user has given a template file
105 if(it != parameters.end()){
106 path = m->hasPath(it->second);
107 //if the user has not given a path then, add inputdir. else leave path alone.
108 if (path == "") { parameters["chimera"] = inputDir + it->second; }
111 it = parameters.find("classify");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["classify"] = inputDir + it->second; }
119 it = parameters.find("taxonomy");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
127 it = parameters.find("pipeline");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["pipeline"] = inputDir + it->second; }
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 pipeFilename = validParameter.validFile(parameters, "pipeline", true);
139 if (pipeFilename == "not found") { pipeFilename = ""; }
140 else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
142 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
143 convert(temp, processors);
145 if (pipeFilename != "") {
146 abort = readUsersPipeline();
148 sffFile = validParameter.validFile(parameters, "sff", true);
149 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
150 else if (sffFile == "not open") { sffFile = ""; abort = true; }
152 oligosFile = validParameter.validFile(parameters, "oligos", true);
153 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
154 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
156 alignFile = validParameter.validFile(parameters, "align", true);
157 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
158 else if (alignFile == "not open") { alignFile = ""; abort = true; }
160 chimeraFile = validParameter.validFile(parameters, "chimera", true);
161 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
162 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
164 classifyFile = validParameter.validFile(parameters, "classify", true);
165 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
166 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
168 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
169 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
170 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
175 catch(exception& e) {
176 m->errorOut(e, "PipelineCommand", "PipelineCommand");
180 //**********************************************************************************************************************
182 void PipelineCommand::help(){
184 m->mothurOut("The pipeline command is designed to guide you through your analysis using mothur.\n");
185 m->mothurOut("The pipeline command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n");
186 m->mothurOut("The sff parameter allows you to enter your sff file. It is required.\n");
187 m->mothurOut("The oligos parameter allows you to enter your oligos file. It is required.\n");
188 m->mothurOut("The align parameter allows you to enter a template to use with the aligner. It is required.\n");
189 m->mothurOut("The chimera parameter allows you to enter a template to use for chimera detection. It is required.\n");
190 m->mothurOut("The classify parameter allows you to enter a template to use for classification. It is required.\n");
191 m->mothurOut("The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required.\n");
192 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
193 m->mothurOut("The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use mothurmade instead.\n");
194 m->mothurOut("First column contains the command name, and the second column contains the parameter options or 'defaults', meaning use defaults. You may leave out file options.\n");
195 m->mothurOut("Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n");
196 m->mothurOut("then, you could enter unique.seqs(fasta=mothurmade), and mothur would use the .trim.fasta file from the trim.seqs command. \n");
197 m->mothurOut("then you could enter align.seqs(candidate=mothurmade, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n");
198 m->mothurOut("If no pipeline file is given then mothur will use Pat's pipeline. \n\n");
199 m->mothurOut("Here is a list of the commands used in Pat's pipeline.\n");
200 m->mothurOut("All paralellized commands will use the processors you entered.\n");
201 m->mothurOut("The sffinfo command takes your sff file and extracts the fasta and quality files.\n");
202 m->mothurOut("The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n");
203 m->mothurOut("The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n");
204 m->mothurOut("The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n");
205 m->mothurOut("The align.seqs command aligns the unique sequences using the aligners default options. \n");
206 m->mothurOut("The screen.seqs command screens the sequences using optimize=end-minlength. \n");
207 m->mothurOut("The pipeline uses chimera.slayer to detect chimeras using the default options. \n");
208 m->mothurOut("The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n");
209 m->mothurOut("The filter.seqs command filters the sequences using vertical=T, trump=. \n");
210 m->mothurOut("The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n");
211 m->mothurOut("The pre.cluster command clusters sequences that have no more than 2 differences.\n");
212 m->mothurOut("The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n");
213 m->mothurOut("The pipeline uses cluster with method=average, hard=T. \n");
214 m->mothurOut("The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n");
215 m->mothurOut("The phylotype command is used to cluster the sequences based on their classification.\n");
216 m->mothurOut("The clearcut command is used to generate a tree using neighbor=T. \n");
217 m->mothurOut("The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n");
218 m->mothurOut("The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n");
219 m->mothurOut("The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n");
220 m->mothurOut("The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n");
221 m->mothurOut("The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n");
222 m->mothurOut("The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n");
223 m->mothurOut("\n\n");
225 catch(exception& e) {
226 m->errorOut(e, "PipelineCommand", "help");
231 //**********************************************************************************************************************
233 PipelineCommand::~PipelineCommand(){}
235 //**********************************************************************************************************************
237 int PipelineCommand::execute(){
239 if (abort == true) { return 0; }
241 if (pipeFilename == "") {
242 createPatsPipeline();
245 for (int i = 0; i < commands.size(); i++) {
246 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
248 if (m->control_pressed) { return 0; }
250 CommandOptionParser parser(commands[i]);
251 string commandName = parser.getCommandString();
252 string options = parser.getOptionString();
256 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
258 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
261 //executes valid command
262 Command* command = cFactory->getCommand(commandName, options, "pipe");
265 //add output files to list
266 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
267 map<string, vector<string> >::iterator itMade;
268 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
269 vector<string> temp = itMade->second;
270 for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
278 }else { runUsersPipeline(); }
280 if (m->control_pressed) { return 0; }
282 m->mothurOutEndLine();
283 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
284 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
285 m->mothurOutEndLine();
289 catch(exception& e) {
290 m->errorOut(e, "PipelineCommand", "execute");
294 //**********************************************************************************************************************
296 bool PipelineCommand::readUsersPipeline(){
300 m->openInputFile(pipeFilename, in);
302 string nextCommand = "";
304 map<string, vector<string> > mothurMadeFiles;
307 nextCommand = m->getline(in); m->gobble(in);
309 if (nextCommand[0] != '#') {
312 string commandName, options;
313 error = parseCommand(nextCommand, commandName, options);
315 if (error) { in.close(); return error; }
316 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
318 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
320 if (error) { in.close(); return error; }
328 catch(exception& e) {
329 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
333 //**********************************************************************************************************************
335 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
337 CommandOptionParser parser(nextCommand);
338 name = parser.getCommandString();
339 options = parser.getOptionString();
341 if (name == "") { return true; } //name == "" if () are not right
345 catch(exception& e) {
346 m->errorOut(e, "PipelineCommand", "parseCommand");
350 //**********************************************************************************************************************
352 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
355 //get shell of the command so we can check to make sure its valid without running it
356 Command* command = cFactory->getCommand(name);
358 //check to make sure all parameters are valid for command
359 vector<string> validParameters = command->getValidParameters();
361 OptionParser parser(options);
362 map<string, string> parameters = parser.getParameters();
364 ValidParameters validParameter;
365 map<string, string>::iterator it;
366 map<string, vector<string> >::iterator itMade;
368 for (it = parameters.begin(); it != parameters.end(); it++) {
369 if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
370 if (it->second == "mothurmade") {
371 itMade = mothurMadeFiles.find(it->first);
373 if (itMade == mothurMadeFiles.end()) {
374 m->mothurOut("You have the " + it->first + " listed as a mothurmade file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
380 //is the command missing any required
381 vector<string> requiredParameters = command->getRequiredParameters();
386 if (requiredParameters.size() > 2) {
387 if (requiredParameters[(requiredParameters.size()-1)] == "or") { hasOr = true; }
390 for (int i = 0; i < requiredParameters.size(); i++) {
391 it = parameters.find(requiredParameters[i]);
393 if (it != parameters.end()) { numFound++; }
395 if (!hasOr) { m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine(); }
399 // if all are needed and not all are found
400 if ((!hasOr) && (numFound != requiredParameters.size())) { return true; }
401 //if one is needed and none are found
402 else if ((hasOr) && (numFound == 0)) { return true; }
405 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
406 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
407 mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
412 catch(exception& e) {
413 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
417 //**********************************************************************************************************************
418 int PipelineCommand::runUsersPipeline(){
421 m->openInputFile(pipeFilename, in);
423 string nextCommand = "";
425 map<string, vector<string> > mothurMadeFiles;
428 nextCommand = m->getline(in); m->gobble(in);
430 if (nextCommand[0] != '#') {
431 CommandOptionParser parser(nextCommand);
432 string commandName = parser.getCommandString();
433 string options = parser.getOptionString();
436 bool error = fillInMothurMade(options, mothurMadeFiles);
437 if (error) { in.close(); return 0; }
440 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
442 if (m->control_pressed) { return 0; }
446 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
448 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
451 //executes valid command
452 Command* command = cFactory->getCommand(commandName, options, "pipe");
455 //add output files to list
456 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
457 map<string, vector<string> >::iterator itMade;
458 map<string, vector<string> >::iterator it;
459 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
461 vector<string> temp = itMade->second;
462 for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
464 //update Mothur Made for each file
465 it = mothurMadeFiles.find(itMade->first);
467 if (it == mothurMadeFiles.end()) { //new type
469 mothurMadeFiles[itMade->first] = temp;
471 }else{ //update existing type
472 vector<string> oldFileNames = it->second;
473 //look at new files, see if an old version of the file exists, if so update, else just add.
474 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
476 for (int k = 0; k < temp.size(); k++) {
479 string root = m->getSimpleName(temp[k]);
480 string individual = "";
481 for(int i=0;i<root.length();i++){
486 individual += root[i];
490 //look for that base name in oldfiles
492 for (int l = 0; l < oldFileNames.size(); l++) {
493 int pos = oldFileNames[l].find(root);
494 if (pos != string::npos) {
500 //if you found it update it, else add it
502 mothurMadeFiles[it->first][spot] = temp[k];
504 mothurMadeFiles[it->first].push_back(temp[k]);
518 catch(exception& e) {
519 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
523 //**********************************************************************************************************************
524 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> > mothurMadeFiles){
526 OptionParser parser(options);
527 map<string, string> parameters = parser.getParameters();
528 map<string, string>::iterator it;
529 map<string, vector<string> >::iterator itMade;
533 //fill in mothurmade filenames
534 for (it = parameters.begin(); it != parameters.end(); it++) {
535 if (it->second == "mothurmade") {
536 itMade = mothurMadeFiles.find(it->first);
538 if (itMade == mothurMadeFiles.end()) {
539 m->mothurOut("Looking for a mothurmade " + it->first + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
542 vector<string> temp = itMade->second;
544 if (temp.size() > 1) {
545 //ask user which file to use
546 m->mothurOut("More than one file has been created for the " + it->first + " parameter. "); m->mothurOutEndLine();
547 for (int i = 0; i < temp.size(); i++) {
548 m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
550 m->mothurOut("Please select the number of the file you would like to use: ");
553 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
555 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
557 parameters[it->first] = temp[num];
560 //clears buffer so next command doesn't have error
564 }else if (temp.size() == 0){
565 m->mothurOut("Sorry, we seem to think you created a " + it->first + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
568 parameters[it->first] = temp[0];
573 options += it->first + "=" + parameters[it->first] + ", ";
576 //rip off extra comma
577 options = options.substr(0, (options.length()-2));
581 catch(exception& e) {
582 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
587 //**********************************************************************************************************************
588 void PipelineCommand::createPatsPipeline(){
592 string thisCommand = "sffinfo(sff=" + sffFile + ")";
593 commands.push_back(thisCommand);
596 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
597 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
598 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=F, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
599 commands.push_back(thisCommand);
602 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
603 qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
604 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
605 thisCommand = "unique.seqs(fasta=" + fastaFile + ")";
606 commands.push_back(thisCommand);
609 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
610 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
611 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")";
612 commands.push_back(thisCommand);
615 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
616 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + "group=" + groupFile + ", optimize=end-minlength)";
617 commands.push_back(thisCommand);
620 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
621 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
622 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
623 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")";
624 commands.push_back(thisCommand);
627 string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
628 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.chimeras";
629 thisCommand = "remove.seqs(fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", accnos=" + accnosFile + ", dups=T)";
630 commands.push_back(thisCommand);
633 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
634 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
635 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
636 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", vertical=T, trump=.)";
637 commands.push_back(thisCommand);
640 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
641 thisCommand = "unique.seqs(fasta=" + fastaFile + ", name=" + nameFile + ")";
642 commands.push_back(thisCommand);
645 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
646 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
647 thisCommand = "pre.cluster(fasta=" + fastaFile + ", name=" + nameFile + ", diffs=2)";
648 commands.push_back(thisCommand);
651 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
652 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
653 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", cutoff=0.20)";
654 commands.push_back(thisCommand);
657 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
658 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", output=lt)";
659 commands.push_back(thisCommand);
662 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
663 thisCommand = "read.dist(column=" + columnFile + ", name=" + nameFile + ")";
664 commands.push_back(thisCommand);
667 thisCommand = "cluster(method=average, hard=T)";
668 commands.push_back(thisCommand);
670 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
671 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
674 thisCommand = "degap.seqs(fasta=" + fastaFile + ")";
675 commands.push_back(thisCommand);
678 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
679 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
680 commands.push_back(thisCommand);
682 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
683 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
684 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
685 RippedTaxName += ".";
687 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
688 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
691 thisCommand = "phylotype(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ")";
692 commands.push_back(thisCommand);
694 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
695 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
698 thisCommand = "clearcut(phylip=" + phylipFile + ", neighbor=T)";
699 commands.push_back(thisCommand);
701 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
704 thisCommand = "read.otu(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
705 commands.push_back(thisCommand);
707 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
710 thisCommand = "read.otu(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
711 commands.push_back(thisCommand);
713 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
716 thisCommand = "read.otu(shared=" + sharedFile + ")";
717 commands.push_back(thisCommand);
720 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
721 commands.push_back(thisCommand);
724 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
725 commands.push_back(thisCommand);
728 thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
729 commands.push_back(thisCommand);
732 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
733 commands.push_back(thisCommand);
736 thisCommand = "read.otu(shared=" + phyloSharedFile + ")";
737 commands.push_back(thisCommand);
740 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
741 commands.push_back(thisCommand);
744 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
745 commands.push_back(thisCommand);
748 thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
749 commands.push_back(thisCommand);
752 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
753 commands.push_back(thisCommand);
756 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
757 commands.push_back(thisCommand);
760 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
761 commands.push_back(thisCommand);
764 thisCommand = "read.tree(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
765 commands.push_back(thisCommand);
768 thisCommand = "phylo.diversity(iters=100,rarefy=T)";
769 commands.push_back(thisCommand);
772 thisCommand = "unifrac.weighted(random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
773 commands.push_back(thisCommand);
776 thisCommand = "unifrac.unweighted(random=false, distance=true, processors=" + toString(processors) + ")";
777 commands.push_back(thisCommand);
781 catch(exception& e) {
782 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
787 //**********************************************************************************************************************