2 * pipelinepdscommand.cpp
5 * Created by westcott on 10/5/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::setParameters(){
17 CommandParameter psff("sff", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(psff);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(poligos);
19 CommandParameter palign("align", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(palign);
20 CommandParameter pchimera("chimera", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(pchimera);
21 CommandParameter pclassify("classify", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(pclassify);
22 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(ptaxonomy);
23 CommandParameter ppipeline("pipeline", "InputTypes", "", "", "none", "oneRequired", "none",false,false); parameters.push_back(ppipeline);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "PipelineCommand", "setParameters");
37 //**********************************************************************************************************************
38 string PipelineCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The pipeline.pds command is designed to guide you through your analysis using mothur.\n";
42 helpString += "The pipeline.pds command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n";
43 helpString += "The sff parameter allows you to enter your sff file. It is required, if not using pipeline parameter.\n";
44 helpString += "The oligos parameter allows you to enter your oligos file. It is required, if not using pipeline parameter.\n";
45 helpString += "The align parameter allows you to enter a template to use with the aligner. It is required, if not using pipeline parameter.\n";
46 helpString += "The chimera parameter allows you to enter a template to use for chimera detection. It is required, if not using pipeline parameter.\n";
47 helpString += "The classify parameter allows you to enter a template to use for classification. It is required, if not using pipeline parameter.\n";
48 helpString += "The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required, if not using pipeline parameter.\n";
49 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
50 helpString += "The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use current instead.\n";
51 helpString += "Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n";
52 helpString += "then, you could enter unique.seqs(fasta=current), and mothur would use the .trim.fasta file from the trim.seqs command. \n";
53 helpString += "then you could enter align.seqs(candidate=current, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n";
54 helpString += "If no pipeline file is given then mothur will use Pat's pipeline. \n";
55 helpString += "Here is a list of the commands used in Pat's pipeline.\n";
56 helpString += "All paralellized commands will use the processors you entered.\n";
57 helpString += "The sffinfo command takes your sff file and extracts the fasta and quality files.\n";
58 helpString += "The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n";
59 helpString += "The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n";
60 helpString += "The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n";
61 helpString += "The align.seqs command aligns the unique sequences using the aligners default options. \n";
62 helpString += "The screen.seqs command screens the sequences using optimize=end-minlength. \n";
63 helpString += "The pipeline uses chimera.slayer to detect chimeras using the default options. \n";
64 helpString += "The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n";
65 helpString += "The filter.seqs command filters the sequences using vertical=T, trump=. \n";
66 helpString += "The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n";
67 helpString += "The pre.cluster command clusters sequences that have no more than 2 differences.\n";
68 helpString += "The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n";
69 helpString += "The pipeline uses cluster with method=average, hard=T. \n";
70 helpString += "The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n";
71 helpString += "The phylotype command is used to cluster the sequences based on their classification.\n";
72 helpString += "The clearcut command is used to generate a tree using neighbor=T. \n";
73 helpString += "The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n";
74 helpString += "The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n";
75 helpString += "The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n";
76 helpString += "The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n";
77 helpString += "The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n";
78 helpString += "The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n";
83 m->errorOut(e, "PipelineCommand", "getHelpString");
89 //**********************************************************************************************************************
90 PipelineCommand::PipelineCommand(string option) {
92 cFactory = CommandFactory::getInstance();
93 abort = false; calledHelp = false;
95 //allow user to run help
96 if(option == "help") { help(); abort = true; calledHelp = true; }
97 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100 vector<string> myArray = setParameters();
102 OptionParser parser(option);
103 map<string, string> parameters = parser.getParameters();
105 ValidParameters validParameter;
106 map<string, string>::iterator it;
108 //check to make sure all parameters are valid for command
109 for (it = parameters.begin(); it != parameters.end(); it++) {
110 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("sff");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["sff"] = inputDir + it->second; }
126 it = parameters.find("oligos");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["oligos"] = inputDir + it->second; }
134 it = parameters.find("align");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["align"] = inputDir + it->second; }
142 it = parameters.find("chimera");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["chimera"] = inputDir + it->second; }
150 it = parameters.find("classify");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["classify"] = inputDir + it->second; }
158 it = parameters.find("taxonomy");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
166 it = parameters.find("pipeline");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["pipeline"] = inputDir + it->second; }
175 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
177 pipeFilename = validParameter.validFile(parameters, "pipeline", true);
178 if (pipeFilename == "not found") { pipeFilename = ""; }
179 else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
181 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
182 m->setProcessors(temp);
183 convert(temp, processors);
185 if (pipeFilename != "") {
186 abort = readUsersPipeline();
188 sffFile = validParameter.validFile(parameters, "sff", true);
189 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
190 else if (sffFile == "not open") { sffFile = ""; abort = true; }
192 oligosFile = validParameter.validFile(parameters, "oligos", true);
193 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
194 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
196 alignFile = validParameter.validFile(parameters, "align", true);
197 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
198 else if (alignFile == "not open") { alignFile = ""; abort = true; }
200 chimeraFile = validParameter.validFile(parameters, "chimera", true);
201 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
202 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
204 classifyFile = validParameter.validFile(parameters, "classify", true);
205 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
206 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
208 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
209 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
210 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
215 catch(exception& e) {
216 m->errorOut(e, "PipelineCommand", "PipelineCommand");
220 //**********************************************************************************************************************
222 int PipelineCommand::execute(){
224 if (abort == true) { if (calledHelp) { return 0; } return 2; }
226 int start = time(NULL);
228 if (pipeFilename == "") {
229 createPatsPipeline();
232 for (int i = 0; i < commands.size(); i++) {
233 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
235 if (m->control_pressed) { return 0; }
237 CommandOptionParser parser(commands[i]);
238 string commandName = parser.getCommandString();
239 string options = parser.getOptionString();
243 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
245 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
248 //executes valid command
249 Command* command = cFactory->getCommand(commandName, options, "pipe");
252 //add output files to list
253 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
254 map<string, vector<string> >::iterator itMade;
255 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
256 vector<string> temp = itMade->second;
257 for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
265 }else { runUsersPipeline(); }
267 if (m->control_pressed) { return 0; }
269 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
271 m->mothurOutEndLine();
272 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
273 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
274 m->mothurOutEndLine();
278 catch(exception& e) {
279 m->errorOut(e, "PipelineCommand", "execute");
283 //**********************************************************************************************************************
285 bool PipelineCommand::readUsersPipeline(){
289 m->openInputFile(pipeFilename, in);
291 string nextCommand = "";
293 map<string, vector<string> > mothurMadeFiles;
296 nextCommand = m->getline(in); m->gobble(in);
298 if (nextCommand[0] != '#') {
301 string commandName, options;
302 error = parseCommand(nextCommand, commandName, options);
304 if (error) { in.close(); return error; }
305 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
307 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
309 if (error) { in.close(); return error; }
317 catch(exception& e) {
318 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
322 //**********************************************************************************************************************
324 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
326 CommandOptionParser parser(nextCommand);
327 name = parser.getCommandString();
328 options = parser.getOptionString();
330 if (name == "") { return true; } //name == "" if () are not right
334 catch(exception& e) {
335 m->errorOut(e, "PipelineCommand", "parseCommand");
339 //**********************************************************************************************************************
341 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
344 if (name == "system") { return false; }
346 //get shell of the command so we can check to make sure its valid without running it
347 Command* command = cFactory->getCommand(name);
349 //check to make sure all parameters are valid for command
350 vector<string> validParameters = command->setParameters();
352 OptionParser parser(options);
353 map<string, string> parameters = parser.getParameters();
355 ValidParameters validParameter;
356 map<string, string>::iterator it;
357 map<string, vector<string> >::iterator itMade;
359 for (it = parameters.begin(); it != parameters.end(); it++) {
361 if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
362 if (it->second == "current") {
363 itMade = mothurMadeFiles.find(it->first);
365 if (itMade == mothurMadeFiles.end()) {
366 m->mothurOut("You have the " + it->first + " listed as a current file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
372 //is the command missing any required
373 vector<CommandParameter> commandParameters = command->getParameters();
374 vector<string> requiredParameters;
375 for (int i = 0; i < commandParameters.size(); i++) {
376 if (commandParameters[i].required) {
377 requiredParameters.push_back(commandParameters[i].name);
381 for (int i = 0; i < requiredParameters.size(); i++) {
382 it = parameters.find(requiredParameters[i]);
384 if (it == parameters.end()) {
386 string paraToLookFor = requiredParameters[i];
388 //does mothur have a current file for this?
389 itMade = mothurMadeFiles.find(requiredParameters[i]);
391 if (itMade == mothurMadeFiles.end()) {
392 m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine();
400 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
401 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
402 mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
407 catch(exception& e) {
408 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
412 //**********************************************************************************************************************
413 int PipelineCommand::runUsersPipeline(){
416 m->openInputFile(pipeFilename, in);
418 string nextCommand = "";
420 map<string, vector<string> > mothurMadeFiles;
423 nextCommand = m->getline(in); m->gobble(in);
425 if (nextCommand[0] != '#') {
426 CommandOptionParser parser(nextCommand);
427 string commandName = parser.getCommandString();
428 string options = parser.getOptionString();
430 if ((options != "") && (commandName != "system")) {
431 bool error = fillInMothurMade(options, mothurMadeFiles);
432 if (error) { in.close(); return 0; }
435 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
437 if (m->control_pressed) { return 0; }
441 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
443 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
446 //executes valid command
447 Command* command = cFactory->getCommand(commandName, options, "pipe");
450 //add output files to list
451 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
452 map<string, vector<string> >::iterator itMade;
453 map<string, vector<string> >::iterator it;
454 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
456 vector<string> temp = itMade->second;
457 for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
459 //update Mothur Made for each file
460 it = mothurMadeFiles.find(itMade->first);
462 if (it == mothurMadeFiles.end()) { //new type
464 mothurMadeFiles[itMade->first] = temp;
466 }else{ //update existing type
467 vector<string> oldFileNames = it->second;
468 //look at new files, see if an old version of the file exists, if so update, else just add.
469 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
471 for (int k = 0; k < temp.size(); k++) {
474 string root = m->getSimpleName(temp[k]);
475 string individual = "";
476 for(int i=0;i<root.length();i++){
481 individual += root[i];
485 //look for that base name in oldfiles
487 for (int l = 0; l < oldFileNames.size(); l++) {
488 int pos = oldFileNames[l].find(root);
489 if (pos != string::npos) {
495 //if you found it update it, else add it
497 mothurMadeFiles[it->first][spot] = temp[k];
499 mothurMadeFiles[it->first].push_back(temp[k]);
515 catch(exception& e) {
516 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
520 //**********************************************************************************************************************
521 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
524 OptionParser parser(options);
525 map<string, string> parameters = parser.getParameters();
526 map<string, string>::iterator it;
527 map<string, vector<string> >::iterator itMade;
531 //fill in mothurmade filenames
532 for (it = parameters.begin(); it != parameters.end(); it++) {
533 string paraType = it->first;
534 string tempOption = it->second;
536 if (tempOption == "current") {
538 itMade = mothurMadeFiles.find(paraType);
540 if (itMade == mothurMadeFiles.end()) {
541 m->mothurOut("Looking for a current " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
544 vector<string> temp = itMade->second;
546 if (temp.size() > 1) {
547 //ask user which file to use
548 m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
549 for (int i = 0; i < temp.size(); i++) {
550 m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
553 m->mothurOut("Please select the number of the file you would like to use: ");
556 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
558 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
560 tempOption = temp[num];
563 //clears buffer so next command doesn't have error
567 vector<string> newTemp;
568 for (int i = 0; i < temp.size(); i++) {
569 if (i == num) { newTemp.push_back(temp[i]); }
571 m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
574 m->mothurOutJustToLog(response); m->mothurOutEndLine();
576 if (response == "n") { newTemp.push_back(temp[i]); }
578 //clears buffer so next command doesn't have error
584 mothurMadeFiles[paraType] = newTemp;
587 }else if (temp.size() == 0){
588 m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
591 tempOption = temp[0];
596 options += it->first + "=" + tempOption + ", ";
599 //rip off extra comma
600 options = options.substr(0, (options.length()-2));
604 catch(exception& e) {
605 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
610 //**********************************************************************************************************************
611 void PipelineCommand::createPatsPipeline(){
615 string thisCommand = "sffinfo(sff=" + sffFile + ")";
616 commands.push_back(thisCommand);
619 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
620 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
621 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=current, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=current)";
622 commands.push_back(thisCommand);
625 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
626 qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
627 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
628 thisCommand = "unique.seqs(fasta=current)";
629 commands.push_back(thisCommand);
632 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
633 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
634 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=current, template=" + alignFile + ")";
635 commands.push_back(thisCommand);
638 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
639 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=current, name=current, group=current, optimize=end-minlength)";
640 commands.push_back(thisCommand);
643 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
644 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
645 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
646 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=current, template=" + chimeraFile + ")";
647 commands.push_back(thisCommand);
650 string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
651 thisCommand = "remove.seqs(fasta=current, name=current, group=current, accnos=current, dups=T)";
652 commands.push_back(thisCommand);
655 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
656 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
657 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
658 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=current, vertical=T, trump=.)";
659 commands.push_back(thisCommand);
662 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
663 thisCommand = "unique.seqs(fasta=current, name=current)";
664 commands.push_back(thisCommand);
667 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
668 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
669 thisCommand = "pre.cluster(fasta=current, name=current, diffs=2)";
670 commands.push_back(thisCommand);
673 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
674 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
675 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, cutoff=0.20)";
676 commands.push_back(thisCommand);
679 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
680 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, output=lt)";
681 commands.push_back(thisCommand);
684 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
685 thisCommand = "read.dist(column=current, name=current)";
686 commands.push_back(thisCommand);
689 thisCommand = "cluster(method=average, hard=T)";
690 commands.push_back(thisCommand);
692 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
693 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
696 thisCommand = "degap.seqs(fasta=current)";
697 commands.push_back(thisCommand);
700 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
701 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=current, name=current, template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
702 commands.push_back(thisCommand);
704 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
705 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
706 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
707 RippedTaxName += ".";
709 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
710 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
713 thisCommand = "phylotype(taxonomy=current, name=current)";
714 commands.push_back(thisCommand);
716 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
717 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
720 thisCommand = "clearcut(phylip=current, neighbor=T)";
721 commands.push_back(thisCommand);
723 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
726 thisCommand = "make.shared(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
727 commands.push_back(thisCommand);
729 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
732 thisCommand = "make.shared(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
733 commands.push_back(thisCommand);
735 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
738 thisCommand = "set.current(shared=" + sharedFile + ")";
739 commands.push_back(thisCommand);
742 thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
743 commands.push_back(thisCommand);
746 thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
747 commands.push_back(thisCommand);
750 //thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
751 //commands.push_back(thisCommand);
754 thisCommand = "summary.single(rabund=" + rabundFile + ", label=0.03, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
755 commands.push_back(thisCommand);
758 thisCommand = "set.current(shared=" + phyloSharedFile + ")";
759 commands.push_back(thisCommand);
762 thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
763 commands.push_back(thisCommand);
766 thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
767 commands.push_back(thisCommand);
770 //thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
771 //commands.push_back(thisCommand);
774 thisCommand = "summary.single(rabund=" + phyloRabundFile + ", label=1, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
775 commands.push_back(thisCommand);
778 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
779 commands.push_back(thisCommand);
782 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
783 commands.push_back(thisCommand);
786 thisCommand = "set.current(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
787 commands.push_back(thisCommand);
790 thisCommand = "phylo.diversity(tree=current, group=current, name=current, iters=100,rarefy=T)";
791 commands.push_back(thisCommand);
794 thisCommand = "unifrac.weighted(tree=current, group=current, name=current, random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
795 commands.push_back(thisCommand);
798 thisCommand = "unifrac.unweighted(tree=current, group=current, name=current, random=false, distance=true, processors=" + toString(processors) + ")";
799 commands.push_back(thisCommand);
803 catch(exception& e) {
804 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
809 //**********************************************************************************************************************