2 * pipelinepdscommand.cpp
5 * Created by westcott on 10/5/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::getValidParameters(){
17 string Array[] = {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "PipelineCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 vector<string> PipelineCommand::getRequiredParameters(){
29 vector<string> myArray;
33 m->errorOut(e, "PipelineCommand", "getRequiredParameters");
37 //**********************************************************************************************************************
38 vector<string> PipelineCommand::getRequiredFiles(){
40 vector<string> myArray;
44 m->errorOut(e, "PipelineCommand", "getRequiredFiles");
48 //**********************************************************************************************************************
49 PipelineCommand::PipelineCommand(string option) {
51 cFactory = CommandFactory::getInstance();
52 abort = false; calledHelp = false;
54 //allow user to run help
55 if(option == "help") { help(); abort = true; calledHelp = true; }
59 //valid paramters for this command
60 string AlignArray[] = {"sff","oligos","align","chimera","classify","taxonomy","pipeline","processors","outputdir","inputdir"};
61 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
63 OptionParser parser(option);
64 map<string, string> parameters = parser.getParameters();
66 ValidParameters validParameter;
67 map<string, string>::iterator it;
69 //check to make sure all parameters are valid for command
70 for (it = parameters.begin(); it != parameters.end(); it++) {
71 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
74 //if the user changes the input directory command factory will send this info to us in the output parameter
75 string inputDir = validParameter.validFile(parameters, "inputdir", false);
76 if (inputDir == "not found"){ inputDir = ""; }
79 it = parameters.find("sff");
80 //user has given a template file
81 if(it != parameters.end()){
82 path = m->hasPath(it->second);
83 //if the user has not given a path then, add inputdir. else leave path alone.
84 if (path == "") { parameters["sff"] = inputDir + it->second; }
87 it = parameters.find("oligos");
88 //user has given a template file
89 if(it != parameters.end()){
90 path = m->hasPath(it->second);
91 //if the user has not given a path then, add inputdir. else leave path alone.
92 if (path == "") { parameters["oligos"] = inputDir + it->second; }
95 it = parameters.find("align");
96 //user has given a template file
97 if(it != parameters.end()){
98 path = m->hasPath(it->second);
99 //if the user has not given a path then, add inputdir. else leave path alone.
100 if (path == "") { parameters["align"] = inputDir + it->second; }
103 it = parameters.find("chimera");
104 //user has given a template file
105 if(it != parameters.end()){
106 path = m->hasPath(it->second);
107 //if the user has not given a path then, add inputdir. else leave path alone.
108 if (path == "") { parameters["chimera"] = inputDir + it->second; }
111 it = parameters.find("classify");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["classify"] = inputDir + it->second; }
119 it = parameters.find("taxonomy");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
127 it = parameters.find("pipeline");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["pipeline"] = inputDir + it->second; }
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 pipeFilename = validParameter.validFile(parameters, "pipeline", true);
139 if (pipeFilename == "not found") { pipeFilename = ""; }
140 else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
142 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
143 convert(temp, processors);
145 if (pipeFilename != "") {
146 abort = readUsersPipeline();
148 sffFile = validParameter.validFile(parameters, "sff", true);
149 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
150 else if (sffFile == "not open") { sffFile = ""; abort = true; }
152 oligosFile = validParameter.validFile(parameters, "oligos", true);
153 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
154 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
156 alignFile = validParameter.validFile(parameters, "align", true);
157 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
158 else if (alignFile == "not open") { alignFile = ""; abort = true; }
160 chimeraFile = validParameter.validFile(parameters, "chimera", true);
161 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
162 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
164 classifyFile = validParameter.validFile(parameters, "classify", true);
165 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
166 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
168 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
169 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
170 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
175 catch(exception& e) {
176 m->errorOut(e, "PipelineCommand", "PipelineCommand");
180 //**********************************************************************************************************************
182 void PipelineCommand::help(){
184 m->mothurOut("The pipeline command is designed to guide you through your analysis using mothur.\n");
185 m->mothurOut("The pipeline command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n");
186 m->mothurOut("The sff parameter allows you to enter your sff file. It is required, if not using pipeline parameter.\n");
187 m->mothurOut("The oligos parameter allows you to enter your oligos file. It is required, if not using pipeline parameter.\n");
188 m->mothurOut("The align parameter allows you to enter a template to use with the aligner. It is required, if not using pipeline parameter.\n");
189 m->mothurOut("The chimera parameter allows you to enter a template to use for chimera detection. It is required, if not using pipeline parameter.\n");
190 m->mothurOut("The classify parameter allows you to enter a template to use for classification. It is required, if not using pipeline parameter.\n");
191 m->mothurOut("The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required, if not using pipeline parameter.\n");
192 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
193 m->mothurOut("The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use mothurmade instead.\n");
194 m->mothurOut("Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n");
195 m->mothurOut("then, you could enter unique.seqs(fasta=mothurmade), and mothur would use the .trim.fasta file from the trim.seqs command. \n");
196 m->mothurOut("then you could enter align.seqs(candidate=mothurmade, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n");
197 m->mothurOut("If no pipeline file is given then mothur will use Pat's pipeline. \n\n");
198 m->mothurOut("Here is a list of the commands used in Pat's pipeline.\n");
199 m->mothurOut("All paralellized commands will use the processors you entered.\n");
200 m->mothurOut("The sffinfo command takes your sff file and extracts the fasta and quality files.\n");
201 m->mothurOut("The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n");
202 m->mothurOut("The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n");
203 m->mothurOut("The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n");
204 m->mothurOut("The align.seqs command aligns the unique sequences using the aligners default options. \n");
205 m->mothurOut("The screen.seqs command screens the sequences using optimize=end-minlength. \n");
206 m->mothurOut("The pipeline uses chimera.slayer to detect chimeras using the default options. \n");
207 m->mothurOut("The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n");
208 m->mothurOut("The filter.seqs command filters the sequences using vertical=T, trump=. \n");
209 m->mothurOut("The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n");
210 m->mothurOut("The pre.cluster command clusters sequences that have no more than 2 differences.\n");
211 m->mothurOut("The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n");
212 m->mothurOut("The pipeline uses cluster with method=average, hard=T. \n");
213 m->mothurOut("The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n");
214 m->mothurOut("The phylotype command is used to cluster the sequences based on their classification.\n");
215 m->mothurOut("The clearcut command is used to generate a tree using neighbor=T. \n");
216 m->mothurOut("The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n");
217 m->mothurOut("The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n");
218 m->mothurOut("The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n");
219 m->mothurOut("The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n");
220 m->mothurOut("The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n");
221 m->mothurOut("The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n");
222 m->mothurOut("\n\n");
224 catch(exception& e) {
225 m->errorOut(e, "PipelineCommand", "help");
230 //**********************************************************************************************************************
232 PipelineCommand::~PipelineCommand(){}
234 //**********************************************************************************************************************
236 int PipelineCommand::execute(){
238 if (abort == true) { if (calledHelp) { return 0; } return 2; }
240 int start = time(NULL);
242 if (pipeFilename == "") {
243 createPatsPipeline();
246 for (int i = 0; i < commands.size(); i++) {
247 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
249 if (m->control_pressed) { return 0; }
251 CommandOptionParser parser(commands[i]);
252 string commandName = parser.getCommandString();
253 string options = parser.getOptionString();
257 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
259 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
262 //executes valid command
263 Command* command = cFactory->getCommand(commandName, options, "pipe");
266 //add output files to list
267 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
268 map<string, vector<string> >::iterator itMade;
269 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
270 vector<string> temp = itMade->second;
271 for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
279 }else { runUsersPipeline(); }
281 if (m->control_pressed) { return 0; }
283 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
285 m->mothurOutEndLine();
286 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
287 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
288 m->mothurOutEndLine();
292 catch(exception& e) {
293 m->errorOut(e, "PipelineCommand", "execute");
297 //**********************************************************************************************************************
299 bool PipelineCommand::readUsersPipeline(){
303 m->openInputFile(pipeFilename, in);
305 string nextCommand = "";
307 map<string, vector<string> > mothurMadeFiles;
310 nextCommand = m->getline(in); m->gobble(in);
312 if (nextCommand[0] != '#') {
315 string commandName, options;
316 error = parseCommand(nextCommand, commandName, options);
318 if (error) { in.close(); return error; }
319 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
321 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
323 if (error) { in.close(); return error; }
331 catch(exception& e) {
332 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
336 //**********************************************************************************************************************
338 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
340 CommandOptionParser parser(nextCommand);
341 name = parser.getCommandString();
342 options = parser.getOptionString();
344 if (name == "") { return true; } //name == "" if () are not right
348 catch(exception& e) {
349 m->errorOut(e, "PipelineCommand", "parseCommand");
353 //**********************************************************************************************************************
355 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
358 if (name == "system") { return false; }
360 if (name == "system") { return false; }
362 //get shell of the command so we can check to make sure its valid without running it
363 Command* command = cFactory->getCommand(name);
365 //check to make sure all parameters are valid for command
366 vector<string> validParameters = command->getValidParameters();
368 OptionParser parser(options);
369 map<string, string> parameters = parser.getParameters();
371 ValidParameters validParameter;
372 map<string, string>::iterator it;
373 map<string, vector<string> >::iterator itMade;
375 for (it = parameters.begin(); it != parameters.end(); it++) {
377 if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
378 if (it->second == "mothurmade") {
379 itMade = mothurMadeFiles.find(it->first);
381 if (itMade == mothurMadeFiles.end()) {
382 if ((name == "align.seqs") && (it->first == "candidate")) {} //do nothing about candidate
384 m->mothurOut("You have the " + it->first + " listed as a mothurmade file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
391 //is the command missing any required
392 vector<string> requiredParameters = command->getRequiredParameters();
397 if (requiredParameters.size() > 2) {
398 if (requiredParameters[(requiredParameters.size()-1)] == "or") { hasOr = true; }
401 for (int i = 0; i < requiredParameters.size(); i++) {
402 it = parameters.find(requiredParameters[i]);
404 if (it != parameters.end()) { numFound++; }
406 if (!hasOr) { m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine(); }
410 // if all are needed and not all are found
411 if ((!hasOr) && (numFound != requiredParameters.size())) { return true; }
412 //if one is needed and none are found
413 else if ((hasOr) && (numFound == 0)) { return true; }
416 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
417 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
418 mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
423 catch(exception& e) {
424 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
428 //**********************************************************************************************************************
429 int PipelineCommand::runUsersPipeline(){
432 m->openInputFile(pipeFilename, in);
434 string nextCommand = "";
436 map<string, vector<string> > mothurMadeFiles;
439 nextCommand = m->getline(in); m->gobble(in);
441 if (nextCommand[0] != '#') {
442 CommandOptionParser parser(nextCommand);
443 string commandName = parser.getCommandString();
444 string options = parser.getOptionString();
446 if ((options != "") && (commandName != "system")) {
447 bool error = fillInMothurMade(options, mothurMadeFiles);
448 if (error) { in.close(); return 0; }
451 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
453 if (m->control_pressed) { return 0; }
457 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
459 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
462 //executes valid command
463 Command* command = cFactory->getCommand(commandName, options, "pipe");
466 //add output files to list
467 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
468 map<string, vector<string> >::iterator itMade;
469 map<string, vector<string> >::iterator it;
470 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
472 vector<string> temp = itMade->second;
473 for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
475 //update Mothur Made for each file
476 it = mothurMadeFiles.find(itMade->first);
478 if (it == mothurMadeFiles.end()) { //new type
480 mothurMadeFiles[itMade->first] = temp;
482 }else{ //update existing type
483 vector<string> oldFileNames = it->second;
484 //look at new files, see if an old version of the file exists, if so update, else just add.
485 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
487 for (int k = 0; k < temp.size(); k++) {
490 string root = m->getSimpleName(temp[k]);
491 string individual = "";
492 for(int i=0;i<root.length();i++){
497 individual += root[i];
501 //look for that base name in oldfiles
503 for (int l = 0; l < oldFileNames.size(); l++) {
504 int pos = oldFileNames[l].find(root);
505 if (pos != string::npos) {
511 //if you found it update it, else add it
513 mothurMadeFiles[it->first][spot] = temp[k];
515 mothurMadeFiles[it->first].push_back(temp[k]);
531 catch(exception& e) {
532 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
536 //**********************************************************************************************************************
537 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
540 OptionParser parser(options);
541 map<string, string> parameters = parser.getParameters();
542 map<string, string>::iterator it;
543 map<string, vector<string> >::iterator itMade;
547 //fill in mothurmade filenames
548 for (it = parameters.begin(); it != parameters.end(); it++) {
549 string paraType = it->first;
550 string tempOption = it->second;
552 if (tempOption == "mothurmade") {
554 if (it->first == "candidate") { paraType = "fasta"; }
556 itMade = mothurMadeFiles.find(paraType);
558 if (itMade == mothurMadeFiles.end()) {
559 m->mothurOut("Looking for a mothurmade " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
562 vector<string> temp = itMade->second;
564 if (temp.size() > 1) {
565 //ask user which file to use
566 m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
567 for (int i = 0; i < temp.size(); i++) {
568 m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
571 m->mothurOut("Please select the number of the file you would like to use: ");
574 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
576 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
578 tempOption = temp[num];
581 //clears buffer so next command doesn't have error
585 vector<string> newTemp;
586 for (int i = 0; i < temp.size(); i++) {
587 if (i == num) { newTemp.push_back(temp[i]); }
589 m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
592 m->mothurOutJustToLog(response); m->mothurOutEndLine();
594 if (response == "n") { newTemp.push_back(temp[i]); }
596 //clears buffer so next command doesn't have error
602 mothurMadeFiles[paraType] = newTemp;
605 }else if (temp.size() == 0){
606 m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
609 tempOption = temp[0];
614 options += it->first + "=" + tempOption + ", ";
617 //rip off extra comma
618 options = options.substr(0, (options.length()-2));
622 catch(exception& e) {
623 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
628 //**********************************************************************************************************************
629 void PipelineCommand::createPatsPipeline(){
633 string thisCommand = "sffinfo(sff=" + sffFile + ")";
634 commands.push_back(thisCommand);
637 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
638 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
639 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
640 commands.push_back(thisCommand);
643 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
644 qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
645 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
646 thisCommand = "unique.seqs(fasta=" + fastaFile + ")";
647 commands.push_back(thisCommand);
650 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
651 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
652 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")";
653 commands.push_back(thisCommand);
656 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
657 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)";
658 commands.push_back(thisCommand);
661 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
662 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
663 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
664 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")";
665 commands.push_back(thisCommand);
668 string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
669 thisCommand = "remove.seqs(fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", accnos=" + accnosFile + ", dups=T)";
670 commands.push_back(thisCommand);
673 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
674 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
675 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
676 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", vertical=T, trump=.)";
677 commands.push_back(thisCommand);
680 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
681 thisCommand = "unique.seqs(fasta=" + fastaFile + ", name=" + nameFile + ")";
682 commands.push_back(thisCommand);
685 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
686 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
687 thisCommand = "pre.cluster(fasta=" + fastaFile + ", name=" + nameFile + ", diffs=2)";
688 commands.push_back(thisCommand);
691 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
692 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
693 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", cutoff=0.20)";
694 commands.push_back(thisCommand);
697 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
698 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", output=lt)";
699 commands.push_back(thisCommand);
702 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
703 thisCommand = "read.dist(column=" + columnFile + ", name=" + nameFile + ")";
704 commands.push_back(thisCommand);
707 thisCommand = "cluster(method=average, hard=T)";
708 commands.push_back(thisCommand);
710 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
711 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
714 thisCommand = "degap.seqs(fasta=" + fastaFile + ")";
715 commands.push_back(thisCommand);
718 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
719 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
720 commands.push_back(thisCommand);
722 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
723 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
724 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
725 RippedTaxName += ".";
727 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
728 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
731 thisCommand = "phylotype(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ")";
732 commands.push_back(thisCommand);
734 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
735 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
738 thisCommand = "clearcut(phylip=" + phylipFile + ", neighbor=T)";
739 commands.push_back(thisCommand);
741 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
744 thisCommand = "read.otu(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
745 commands.push_back(thisCommand);
747 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
750 thisCommand = "read.otu(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
751 commands.push_back(thisCommand);
753 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
756 thisCommand = "read.otu(shared=" + sharedFile + ")";
757 commands.push_back(thisCommand);
760 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
761 commands.push_back(thisCommand);
764 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
765 commands.push_back(thisCommand);
768 thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
769 commands.push_back(thisCommand);
772 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
773 commands.push_back(thisCommand);
776 thisCommand = "read.otu(shared=" + phyloSharedFile + ")";
777 commands.push_back(thisCommand);
780 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
781 commands.push_back(thisCommand);
784 thisCommand = "summary.shared(calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
785 commands.push_back(thisCommand);
788 thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
789 commands.push_back(thisCommand);
792 thisCommand = "summary.single(calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
793 commands.push_back(thisCommand);
796 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
797 commands.push_back(thisCommand);
800 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
801 commands.push_back(thisCommand);
804 thisCommand = "read.tree(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
805 commands.push_back(thisCommand);
808 thisCommand = "phylo.diversity(iters=100,rarefy=T)";
809 commands.push_back(thisCommand);
812 thisCommand = "unifrac.weighted(random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
813 commands.push_back(thisCommand);
816 thisCommand = "unifrac.unweighted(random=false, distance=true, processors=" + toString(processors) + ")";
817 commands.push_back(thisCommand);
821 catch(exception& e) {
822 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
827 //**********************************************************************************************************************