2 * pipelinepdscommand.cpp
5 * Created by westcott on 10/5/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::setParameters(){
17 CommandParameter psff("sff", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(psff);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(poligos);
19 CommandParameter palign("align", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(palign);
20 CommandParameter pchimera("chimera", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(pchimera);
21 CommandParameter pclassify("classify", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(pclassify);
22 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(ptaxonomy);
23 CommandParameter ppipeline("pipeline", "InputTypes", "", "", "none", "oneRequired", "none",false,false); parameters.push_back(ppipeline);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "PipelineCommand", "setParameters");
37 //**********************************************************************************************************************
38 string PipelineCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The pipeline.pds command is designed to guide you through your analysis using mothur.\n";
42 helpString += "The pipeline.pds command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n";
43 helpString += "The sff parameter allows you to enter your sff file. It is required, if not using pipeline parameter.\n";
44 helpString += "The oligos parameter allows you to enter your oligos file. It is required, if not using pipeline parameter.\n";
45 helpString += "The align parameter allows you to enter a template to use with the aligner. It is required, if not using pipeline parameter.\n";
46 helpString += "The chimera parameter allows you to enter a template to use for chimera detection. It is required, if not using pipeline parameter.\n";
47 helpString += "The classify parameter allows you to enter a template to use for classification. It is required, if not using pipeline parameter.\n";
48 helpString += "The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required, if not using pipeline parameter.\n";
49 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
50 helpString += "The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use current instead.\n";
51 helpString += "Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n";
52 helpString += "then, you could enter unique.seqs(fasta=current), and mothur would use the .trim.fasta file from the trim.seqs command. \n";
53 helpString += "then you could enter align.seqs(candidate=current, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n";
54 helpString += "If no pipeline file is given then mothur will use Pat's pipeline. \n";
55 helpString += "Here is a list of the commands used in Pat's pipeline.\n";
56 helpString += "All paralellized commands will use the processors you entered.\n";
57 helpString += "The sffinfo command takes your sff file and extracts the fasta and quality files.\n";
58 helpString += "The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n";
59 helpString += "The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n";
60 helpString += "The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n";
61 helpString += "The align.seqs command aligns the unique sequences using the aligners default options. \n";
62 helpString += "The screen.seqs command screens the sequences using optimize=end-minlength. \n";
63 helpString += "The pipeline uses chimera.slayer to detect chimeras using the default options. \n";
64 helpString += "The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n";
65 helpString += "The filter.seqs command filters the sequences using vertical=T, trump=. \n";
66 helpString += "The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n";
67 helpString += "The pre.cluster command clusters sequences that have no more than 2 differences.\n";
68 helpString += "The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n";
69 helpString += "The pipeline uses cluster with method=average, hard=T. \n";
70 helpString += "The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n";
71 helpString += "The phylotype command is used to cluster the sequences based on their classification.\n";
72 helpString += "The clearcut command is used to generate a tree using neighbor=T. \n";
73 helpString += "The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n";
74 helpString += "The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n";
75 helpString += "The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n";
76 helpString += "The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n";
77 helpString += "The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n";
78 helpString += "The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n";
83 m->errorOut(e, "PipelineCommand", "getHelpString");
89 //**********************************************************************************************************************
90 PipelineCommand::PipelineCommand(string option) {
92 cFactory = CommandFactory::getInstance();
93 abort = false; calledHelp = false;
95 //allow user to run help
96 if(option == "help") { help(); abort = true; calledHelp = true; }
99 vector<string> myArray = setParameters();
101 OptionParser parser(option);
102 map<string, string> parameters = parser.getParameters();
104 ValidParameters validParameter;
105 map<string, string>::iterator it;
107 //check to make sure all parameters are valid for command
108 for (it = parameters.begin(); it != parameters.end(); it++) {
109 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
112 //if the user changes the input directory command factory will send this info to us in the output parameter
113 string inputDir = validParameter.validFile(parameters, "inputdir", false);
114 if (inputDir == "not found"){ inputDir = ""; }
117 it = parameters.find("sff");
118 //user has given a template file
119 if(it != parameters.end()){
120 path = m->hasPath(it->second);
121 //if the user has not given a path then, add inputdir. else leave path alone.
122 if (path == "") { parameters["sff"] = inputDir + it->second; }
125 it = parameters.find("oligos");
126 //user has given a template file
127 if(it != parameters.end()){
128 path = m->hasPath(it->second);
129 //if the user has not given a path then, add inputdir. else leave path alone.
130 if (path == "") { parameters["oligos"] = inputDir + it->second; }
133 it = parameters.find("align");
134 //user has given a template file
135 if(it != parameters.end()){
136 path = m->hasPath(it->second);
137 //if the user has not given a path then, add inputdir. else leave path alone.
138 if (path == "") { parameters["align"] = inputDir + it->second; }
141 it = parameters.find("chimera");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["chimera"] = inputDir + it->second; }
149 it = parameters.find("classify");
150 //user has given a template file
151 if(it != parameters.end()){
152 path = m->hasPath(it->second);
153 //if the user has not given a path then, add inputdir. else leave path alone.
154 if (path == "") { parameters["classify"] = inputDir + it->second; }
157 it = parameters.find("taxonomy");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
165 it = parameters.find("pipeline");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["pipeline"] = inputDir + it->second; }
174 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
176 pipeFilename = validParameter.validFile(parameters, "pipeline", true);
177 if (pipeFilename == "not found") { pipeFilename = ""; }
178 else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
180 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
181 m->setProcessors(temp);
182 convert(temp, processors);
184 if (pipeFilename != "") {
185 abort = readUsersPipeline();
187 sffFile = validParameter.validFile(parameters, "sff", true);
188 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
189 else if (sffFile == "not open") { sffFile = ""; abort = true; }
191 oligosFile = validParameter.validFile(parameters, "oligos", true);
192 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
193 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
195 alignFile = validParameter.validFile(parameters, "align", true);
196 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
197 else if (alignFile == "not open") { alignFile = ""; abort = true; }
199 chimeraFile = validParameter.validFile(parameters, "chimera", true);
200 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
201 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
203 classifyFile = validParameter.validFile(parameters, "classify", true);
204 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
205 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
207 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
208 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
209 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
214 catch(exception& e) {
215 m->errorOut(e, "PipelineCommand", "PipelineCommand");
219 //**********************************************************************************************************************
221 int PipelineCommand::execute(){
223 if (abort == true) { if (calledHelp) { return 0; } return 2; }
225 int start = time(NULL);
227 if (pipeFilename == "") {
228 createPatsPipeline();
231 for (int i = 0; i < commands.size(); i++) {
232 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
234 if (m->control_pressed) { return 0; }
236 CommandOptionParser parser(commands[i]);
237 string commandName = parser.getCommandString();
238 string options = parser.getOptionString();
242 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
244 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
247 //executes valid command
248 Command* command = cFactory->getCommand(commandName, options, "pipe");
251 //add output files to list
252 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
253 map<string, vector<string> >::iterator itMade;
254 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
255 vector<string> temp = itMade->second;
256 for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
264 }else { runUsersPipeline(); }
266 if (m->control_pressed) { return 0; }
268 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
270 m->mothurOutEndLine();
271 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
272 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
273 m->mothurOutEndLine();
277 catch(exception& e) {
278 m->errorOut(e, "PipelineCommand", "execute");
282 //**********************************************************************************************************************
284 bool PipelineCommand::readUsersPipeline(){
288 m->openInputFile(pipeFilename, in);
290 string nextCommand = "";
292 map<string, vector<string> > mothurMadeFiles;
295 nextCommand = m->getline(in); m->gobble(in);
297 if (nextCommand[0] != '#') {
300 string commandName, options;
301 error = parseCommand(nextCommand, commandName, options);
303 if (error) { in.close(); return error; }
304 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
306 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
308 if (error) { in.close(); return error; }
316 catch(exception& e) {
317 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
321 //**********************************************************************************************************************
323 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
325 CommandOptionParser parser(nextCommand);
326 name = parser.getCommandString();
327 options = parser.getOptionString();
329 if (name == "") { return true; } //name == "" if () are not right
333 catch(exception& e) {
334 m->errorOut(e, "PipelineCommand", "parseCommand");
338 //**********************************************************************************************************************
340 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
343 if (name == "system") { return false; }
345 //get shell of the command so we can check to make sure its valid without running it
346 Command* command = cFactory->getCommand(name);
348 //check to make sure all parameters are valid for command
349 vector<string> validParameters = command->setParameters();
351 OptionParser parser(options);
352 map<string, string> parameters = parser.getParameters();
354 ValidParameters validParameter;
355 map<string, string>::iterator it;
356 map<string, vector<string> >::iterator itMade;
358 for (it = parameters.begin(); it != parameters.end(); it++) {
360 if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
361 if (it->second == "current") {
362 itMade = mothurMadeFiles.find(it->first);
364 if (itMade == mothurMadeFiles.end()) {
365 m->mothurOut("You have the " + it->first + " listed as a current file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
371 //is the command missing any required
372 vector<CommandParameter> commandParameters = command->getParameters();
373 vector<string> requiredParameters;
374 for (int i = 0; i < commandParameters.size(); i++) {
375 if (commandParameters[i].required) {
376 requiredParameters.push_back(commandParameters[i].name);
380 for (int i = 0; i < requiredParameters.size(); i++) {
381 it = parameters.find(requiredParameters[i]);
383 if (it == parameters.end()) {
385 string paraToLookFor = requiredParameters[i];
387 //does mothur have a current file for this?
388 itMade = mothurMadeFiles.find(requiredParameters[i]);
390 if (itMade == mothurMadeFiles.end()) {
391 m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine();
399 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
400 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
401 mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
406 catch(exception& e) {
407 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
411 //**********************************************************************************************************************
412 int PipelineCommand::runUsersPipeline(){
415 m->openInputFile(pipeFilename, in);
417 string nextCommand = "";
419 map<string, vector<string> > mothurMadeFiles;
422 nextCommand = m->getline(in); m->gobble(in);
424 if (nextCommand[0] != '#') {
425 CommandOptionParser parser(nextCommand);
426 string commandName = parser.getCommandString();
427 string options = parser.getOptionString();
429 if ((options != "") && (commandName != "system")) {
430 bool error = fillInMothurMade(options, mothurMadeFiles);
431 if (error) { in.close(); return 0; }
434 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
436 if (m->control_pressed) { return 0; }
440 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
442 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
445 //executes valid command
446 Command* command = cFactory->getCommand(commandName, options, "pipe");
449 //add output files to list
450 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
451 map<string, vector<string> >::iterator itMade;
452 map<string, vector<string> >::iterator it;
453 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
455 vector<string> temp = itMade->second;
456 for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
458 //update Mothur Made for each file
459 it = mothurMadeFiles.find(itMade->first);
461 if (it == mothurMadeFiles.end()) { //new type
463 mothurMadeFiles[itMade->first] = temp;
465 }else{ //update existing type
466 vector<string> oldFileNames = it->second;
467 //look at new files, see if an old version of the file exists, if so update, else just add.
468 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
470 for (int k = 0; k < temp.size(); k++) {
473 string root = m->getSimpleName(temp[k]);
474 string individual = "";
475 for(int i=0;i<root.length();i++){
480 individual += root[i];
484 //look for that base name in oldfiles
486 for (int l = 0; l < oldFileNames.size(); l++) {
487 int pos = oldFileNames[l].find(root);
488 if (pos != string::npos) {
494 //if you found it update it, else add it
496 mothurMadeFiles[it->first][spot] = temp[k];
498 mothurMadeFiles[it->first].push_back(temp[k]);
514 catch(exception& e) {
515 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
519 //**********************************************************************************************************************
520 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
523 OptionParser parser(options);
524 map<string, string> parameters = parser.getParameters();
525 map<string, string>::iterator it;
526 map<string, vector<string> >::iterator itMade;
530 //fill in mothurmade filenames
531 for (it = parameters.begin(); it != parameters.end(); it++) {
532 string paraType = it->first;
533 string tempOption = it->second;
535 if (tempOption == "current") {
537 itMade = mothurMadeFiles.find(paraType);
539 if (itMade == mothurMadeFiles.end()) {
540 m->mothurOut("Looking for a current " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
543 vector<string> temp = itMade->second;
545 if (temp.size() > 1) {
546 //ask user which file to use
547 m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
548 for (int i = 0; i < temp.size(); i++) {
549 m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
552 m->mothurOut("Please select the number of the file you would like to use: ");
555 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
557 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
559 tempOption = temp[num];
562 //clears buffer so next command doesn't have error
566 vector<string> newTemp;
567 for (int i = 0; i < temp.size(); i++) {
568 if (i == num) { newTemp.push_back(temp[i]); }
570 m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
573 m->mothurOutJustToLog(response); m->mothurOutEndLine();
575 if (response == "n") { newTemp.push_back(temp[i]); }
577 //clears buffer so next command doesn't have error
583 mothurMadeFiles[paraType] = newTemp;
586 }else if (temp.size() == 0){
587 m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
590 tempOption = temp[0];
595 options += it->first + "=" + tempOption + ", ";
598 //rip off extra comma
599 options = options.substr(0, (options.length()-2));
603 catch(exception& e) {
604 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
609 //**********************************************************************************************************************
610 void PipelineCommand::createPatsPipeline(){
614 string thisCommand = "sffinfo(sff=" + sffFile + ")";
615 commands.push_back(thisCommand);
618 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
619 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
620 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=current, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=current)";
621 commands.push_back(thisCommand);
624 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
625 qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
626 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
627 thisCommand = "unique.seqs(fasta=current)";
628 commands.push_back(thisCommand);
631 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
632 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
633 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=current, template=" + alignFile + ")";
634 commands.push_back(thisCommand);
637 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
638 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=current, name=current, group=current, optimize=end-minlength)";
639 commands.push_back(thisCommand);
642 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
643 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
644 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
645 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=current, template=" + chimeraFile + ")";
646 commands.push_back(thisCommand);
649 string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
650 thisCommand = "remove.seqs(fasta=current, name=current, group=current, accnos=current, dups=T)";
651 commands.push_back(thisCommand);
654 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
655 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
656 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
657 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=current, vertical=T, trump=.)";
658 commands.push_back(thisCommand);
661 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
662 thisCommand = "unique.seqs(fasta=current, name=current)";
663 commands.push_back(thisCommand);
666 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
667 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
668 thisCommand = "pre.cluster(fasta=current, name=current, diffs=2)";
669 commands.push_back(thisCommand);
672 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
673 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
674 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, cutoff=0.20)";
675 commands.push_back(thisCommand);
678 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
679 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, output=lt)";
680 commands.push_back(thisCommand);
683 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
684 thisCommand = "read.dist(column=current, name=current)";
685 commands.push_back(thisCommand);
688 thisCommand = "cluster(method=average, hard=T)";
689 commands.push_back(thisCommand);
691 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
692 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
695 thisCommand = "degap.seqs(fasta=current)";
696 commands.push_back(thisCommand);
699 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
700 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=current, name=current, template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
701 commands.push_back(thisCommand);
703 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
704 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
705 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
706 RippedTaxName += ".";
708 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
709 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
712 thisCommand = "phylotype(taxonomy=current, name=current)";
713 commands.push_back(thisCommand);
715 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
716 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
719 thisCommand = "clearcut(phylip=current, neighbor=T)";
720 commands.push_back(thisCommand);
722 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
725 thisCommand = "make.shared(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
726 commands.push_back(thisCommand);
728 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
731 thisCommand = "make.shared(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
732 commands.push_back(thisCommand);
734 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
737 thisCommand = "set.current(shared=" + sharedFile + ")";
738 commands.push_back(thisCommand);
741 thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
742 commands.push_back(thisCommand);
745 thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
746 commands.push_back(thisCommand);
749 //thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
750 //commands.push_back(thisCommand);
753 thisCommand = "summary.single(rabund=" + rabundFile + ", label=0.03, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
754 commands.push_back(thisCommand);
757 thisCommand = "set.current(shared=" + phyloSharedFile + ")";
758 commands.push_back(thisCommand);
761 thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
762 commands.push_back(thisCommand);
765 thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
766 commands.push_back(thisCommand);
769 //thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
770 //commands.push_back(thisCommand);
773 thisCommand = "summary.single(rabund=" + phyloRabundFile + ", label=1, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
774 commands.push_back(thisCommand);
777 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
778 commands.push_back(thisCommand);
781 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
782 commands.push_back(thisCommand);
785 thisCommand = "set.current(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
786 commands.push_back(thisCommand);
789 thisCommand = "phylo.diversity(tree=current, group=current, name=current, iters=100,rarefy=T)";
790 commands.push_back(thisCommand);
793 thisCommand = "unifrac.weighted(tree=current, group=current, name=current, random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
794 commands.push_back(thisCommand);
797 thisCommand = "unifrac.unweighted(tree=current, group=current, name=current, random=false, distance=true, processors=" + toString(processors) + ")";
798 commands.push_back(thisCommand);
802 catch(exception& e) {
803 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
808 //**********************************************************************************************************************