2 * pipelinepdscommand.cpp
5 * Created by westcott on 10/5/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "pipelinepdscommand.h"
11 #include "sffinfocommand.h"
12 #include "commandoptionparser.hpp"
14 //**********************************************************************************************************************
15 vector<string> PipelineCommand::setParameters(){
17 CommandParameter psff("sff", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(psff);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(poligos);
19 CommandParameter palign("align", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(palign);
20 CommandParameter pchimera("chimera", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(pchimera);
21 CommandParameter pclassify("classify", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(pclassify);
22 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "oneRequired", "pipe",false,false); parameters.push_back(ptaxonomy);
23 CommandParameter ppipeline("pipeline", "InputTypes", "", "", "none", "oneRequired", "none",false,false); parameters.push_back(ppipeline);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "PipelineCommand", "setParameters");
37 //**********************************************************************************************************************
38 string PipelineCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The pipeline.pds command is designed to guide you through your analysis using mothur.\n";
42 helpString += "The pipeline.pds command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n";
43 helpString += "The sff parameter allows you to enter your sff file. It is required, if not using pipeline parameter.\n";
44 helpString += "The oligos parameter allows you to enter your oligos file. It is required, if not using pipeline parameter.\n";
45 helpString += "The align parameter allows you to enter a template to use with the aligner. It is required, if not using pipeline parameter.\n";
46 helpString += "The chimera parameter allows you to enter a template to use for chimera detection. It is required, if not using pipeline parameter.\n";
47 helpString += "The classify parameter allows you to enter a template to use for classification. It is required, if not using pipeline parameter.\n";
48 helpString += "The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required, if not using pipeline parameter.\n";
49 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
50 helpString += "The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use current instead.\n";
51 helpString += "Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n";
52 helpString += "then, you could enter unique.seqs(fasta=current), and mothur would use the .trim.fasta file from the trim.seqs command. \n";
53 helpString += "then you could enter align.seqs(candidate=current, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n";
54 helpString += "If no pipeline file is given then mothur will use Pat's pipeline. \n";
55 helpString += "Here is a list of the commands used in Pat's pipeline.\n";
56 helpString += "All paralellized commands will use the processors you entered.\n";
57 helpString += "The sffinfo command takes your sff file and extracts the fasta and quality files.\n";
58 helpString += "The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n";
59 helpString += "The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n";
60 helpString += "The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n";
61 helpString += "The align.seqs command aligns the unique sequences using the aligners default options. \n";
62 helpString += "The screen.seqs command screens the sequences using optimize=end-minlength. \n";
63 helpString += "The pipeline uses chimera.slayer to detect chimeras using the default options. \n";
64 helpString += "The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n";
65 helpString += "The filter.seqs command filters the sequences using vertical=T, trump=. \n";
66 helpString += "The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n";
67 helpString += "The pre.cluster command clusters sequences that have no more than 2 differences.\n";
68 helpString += "The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n";
69 helpString += "The pipeline uses cluster with method=average, hard=T. \n";
70 helpString += "The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n";
71 helpString += "The phylotype command is used to cluster the sequences based on their classification.\n";
72 helpString += "The clearcut command is used to generate a tree using neighbor=T. \n";
73 helpString += "The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n";
74 helpString += "The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n";
75 helpString += "The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n";
76 helpString += "The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n";
77 helpString += "The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n";
78 helpString += "The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n";
83 m->errorOut(e, "PipelineCommand", "getHelpString");
89 //**********************************************************************************************************************
90 PipelineCommand::PipelineCommand(string option) {
92 cFactory = CommandFactory::getInstance();
93 abort = false; calledHelp = false;
95 //allow user to run help
96 if(option == "help") { help(); abort = true; calledHelp = true; }
97 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100 vector<string> myArray = setParameters();
102 OptionParser parser(option);
103 map<string, string> parameters = parser.getParameters();
105 ValidParameters validParameter;
106 map<string, string>::iterator it;
108 //check to make sure all parameters are valid for command
109 for (it = parameters.begin(); it != parameters.end(); it++) {
110 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("sff");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["sff"] = inputDir + it->second; }
126 it = parameters.find("oligos");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["oligos"] = inputDir + it->second; }
134 it = parameters.find("align");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["align"] = inputDir + it->second; }
142 it = parameters.find("chimera");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["chimera"] = inputDir + it->second; }
150 it = parameters.find("classify");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["classify"] = inputDir + it->second; }
158 it = parameters.find("taxonomy");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
166 it = parameters.find("pipeline");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["pipeline"] = inputDir + it->second; }
175 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
177 pipeFilename = validParameter.validFile(parameters, "pipeline", true);
178 if (pipeFilename == "not found") { pipeFilename = ""; }
179 else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
181 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
182 m->setProcessors(temp);
183 m->mothurConvert(temp, processors);
185 if (pipeFilename != "") {
186 abort = readUsersPipeline();
188 sffFile = validParameter.validFile(parameters, "sff", true);
189 if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
190 else if (sffFile == "not open") { sffFile = ""; abort = true; }
191 else { m->setSFFFile(sffFile); }
193 oligosFile = validParameter.validFile(parameters, "oligos", true);
194 if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
195 else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
197 alignFile = validParameter.validFile(parameters, "align", true);
198 if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
199 else if (alignFile == "not open") { alignFile = ""; abort = true; }
201 chimeraFile = validParameter.validFile(parameters, "chimera", true);
202 if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
203 else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
205 classifyFile = validParameter.validFile(parameters, "classify", true);
206 if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
207 else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
209 taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
210 if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
211 else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
216 catch(exception& e) {
217 m->errorOut(e, "PipelineCommand", "PipelineCommand");
221 //**********************************************************************************************************************
223 int PipelineCommand::execute(){
225 if (abort == true) { if (calledHelp) { return 0; } return 2; }
227 int start = time(NULL);
229 if (pipeFilename == "") {
230 createPatsPipeline();
233 for (int i = 0; i < commands.size(); i++) {
234 m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
236 if (m->control_pressed) { return 0; }
238 CommandOptionParser parser(commands[i]);
239 string commandName = parser.getCommandString();
240 string options = parser.getOptionString();
244 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
246 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
249 //executes valid command
250 Command* command = cFactory->getCommand(commandName, options, "pipe");
253 //add output files to list
254 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
255 map<string, vector<string> >::iterator itMade;
256 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
257 vector<string> temp = itMade->second;
258 for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
266 }else { runUsersPipeline(); }
268 if (m->control_pressed) { return 0; }
270 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
272 m->mothurOutEndLine();
273 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
274 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
275 m->mothurOutEndLine();
279 catch(exception& e) {
280 m->errorOut(e, "PipelineCommand", "execute");
284 //**********************************************************************************************************************
286 bool PipelineCommand::readUsersPipeline(){
290 m->openInputFile(pipeFilename, in);
292 string nextCommand = "";
294 map<string, vector<string> > mothurMadeFiles;
297 nextCommand = m->getline(in); m->gobble(in);
299 if (nextCommand[0] != '#') {
302 string commandName, options;
303 error = parseCommand(nextCommand, commandName, options);
305 if (error) { in.close(); return error; }
306 if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
308 error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
310 if (error) { in.close(); return error; }
318 catch(exception& e) {
319 m->errorOut(e, "PipelineCommand", "readUsersPipeline");
323 //**********************************************************************************************************************
325 bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
327 CommandOptionParser parser(nextCommand);
328 name = parser.getCommandString();
329 options = parser.getOptionString();
331 if (name == "") { return true; } //name == "" if () are not right
335 catch(exception& e) {
336 m->errorOut(e, "PipelineCommand", "parseCommand");
340 //**********************************************************************************************************************
342 bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
345 if (name == "system") { return false; }
347 //get shell of the command so we can check to make sure its valid without running it
348 Command* command = cFactory->getCommand(name);
350 //check to make sure all parameters are valid for command
351 vector<string> validParameters = command->setParameters();
353 OptionParser parser(options);
354 map<string, string> parameters = parser.getParameters();
356 ValidParameters validParameter;
357 map<string, string>::iterator it;
358 map<string, vector<string> >::iterator itMade;
360 for (it = parameters.begin(); it != parameters.end(); it++) {
362 if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
363 if (it->second == "current") {
364 itMade = mothurMadeFiles.find(it->first);
366 if (itMade == mothurMadeFiles.end()) {
367 m->mothurOut("You have the " + it->first + " listed as a current file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
373 //is the command missing any required
374 vector<CommandParameter> commandParameters = command->getParameters();
375 vector<string> requiredParameters;
376 for (int i = 0; i < commandParameters.size(); i++) {
377 if (commandParameters[i].required) {
378 requiredParameters.push_back(commandParameters[i].name);
382 for (int i = 0; i < requiredParameters.size(); i++) {
383 it = parameters.find(requiredParameters[i]);
385 if (it == parameters.end()) {
387 string paraToLookFor = requiredParameters[i];
389 //does mothur have a current file for this?
390 itMade = mothurMadeFiles.find(requiredParameters[i]);
392 if (itMade == mothurMadeFiles.end()) {
393 m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine();
401 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
402 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
403 mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
408 catch(exception& e) {
409 m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
413 //**********************************************************************************************************************
414 int PipelineCommand::runUsersPipeline(){
417 m->openInputFile(pipeFilename, in);
419 string nextCommand = "";
421 map<string, vector<string> > mothurMadeFiles;
424 nextCommand = m->getline(in); m->gobble(in);
426 if (nextCommand[0] != '#') {
427 CommandOptionParser parser(nextCommand);
428 string commandName = parser.getCommandString();
429 string options = parser.getOptionString();
431 if ((options != "") && (commandName != "system")) {
432 bool error = fillInMothurMade(options, mothurMadeFiles);
433 if (error) { in.close(); return 0; }
436 m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
438 if (m->control_pressed) { return 0; }
442 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
444 if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
447 //executes valid command
448 Command* command = cFactory->getCommand(commandName, options, "pipe");
451 //add output files to list
452 map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
453 map<string, vector<string> >::iterator itMade;
454 map<string, vector<string> >::iterator it;
455 for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
457 vector<string> temp = itMade->second;
458 for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
460 //update Mothur Made for each file
461 it = mothurMadeFiles.find(itMade->first);
463 if (it == mothurMadeFiles.end()) { //new type
465 mothurMadeFiles[itMade->first] = temp;
467 }else{ //update existing type
468 vector<string> oldFileNames = it->second;
469 //look at new files, see if an old version of the file exists, if so update, else just add.
470 //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
472 for (int k = 0; k < temp.size(); k++) {
475 string root = m->getSimpleName(temp[k]);
476 string individual = "";
477 for(int i=0;i<root.length();i++){
482 individual += root[i];
486 //look for that base name in oldfiles
488 for (int l = 0; l < oldFileNames.size(); l++) {
489 int pos = oldFileNames[l].find(root);
490 if (pos != string::npos) {
496 //if you found it update it, else add it
498 mothurMadeFiles[it->first][spot] = temp[k];
500 mothurMadeFiles[it->first].push_back(temp[k]);
516 catch(exception& e) {
517 m->errorOut(e, "PipelineCommand", "runUsersPipeline");
521 //**********************************************************************************************************************
522 bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
525 OptionParser parser(options);
526 map<string, string> parameters = parser.getParameters();
527 map<string, string>::iterator it;
528 map<string, vector<string> >::iterator itMade;
532 //fill in mothurmade filenames
533 for (it = parameters.begin(); it != parameters.end(); it++) {
534 string paraType = it->first;
535 string tempOption = it->second;
537 if (tempOption == "current") {
539 itMade = mothurMadeFiles.find(paraType);
541 if (itMade == mothurMadeFiles.end()) {
542 m->mothurOut("Looking for a current " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
545 vector<string> temp = itMade->second;
547 if (temp.size() > 1) {
548 //ask user which file to use
549 m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
550 for (int i = 0; i < temp.size(); i++) {
551 m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
554 m->mothurOut("Please select the number of the file you would like to use: ");
557 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
559 if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
561 tempOption = temp[num];
564 //clears buffer so next command doesn't have error
568 vector<string> newTemp;
569 for (int i = 0; i < temp.size(); i++) {
570 if (i == num) { newTemp.push_back(temp[i]); }
572 m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
575 m->mothurOutJustToLog(response); m->mothurOutEndLine();
577 if (response == "n") { newTemp.push_back(temp[i]); }
579 //clears buffer so next command doesn't have error
585 mothurMadeFiles[paraType] = newTemp;
588 }else if (temp.size() == 0){
589 m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
592 tempOption = temp[0];
597 options += it->first + "=" + tempOption + ", ";
600 //rip off extra comma
601 options = options.substr(0, (options.length()-2));
605 catch(exception& e) {
606 m->errorOut(e, "PipelineCommand", "fillInMothurMade");
611 //**********************************************************************************************************************
612 void PipelineCommand::createPatsPipeline(){
616 string thisCommand = "sffinfo(sff=" + sffFile + ")";
617 commands.push_back(thisCommand);
620 string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
621 string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
622 thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=current, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=current)";
623 commands.push_back(thisCommand);
626 string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
627 qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
628 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
629 thisCommand = "unique.seqs(fasta=current)";
630 commands.push_back(thisCommand);
633 string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
634 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
635 thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=current, template=" + alignFile + ")";
636 commands.push_back(thisCommand);
639 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
640 thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=current, name=current, group=current, optimize=end-minlength)";
641 commands.push_back(thisCommand);
644 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
645 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
646 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
647 thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=current, template=" + chimeraFile + ")";
648 commands.push_back(thisCommand);
651 string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
652 thisCommand = "remove.seqs(fasta=current, name=current, group=current, accnos=current, dups=T)";
653 commands.push_back(thisCommand);
656 nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
657 groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
658 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
659 thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=current, vertical=T, trump=.)";
660 commands.push_back(thisCommand);
663 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
664 thisCommand = "unique.seqs(fasta=current, name=current)";
665 commands.push_back(thisCommand);
668 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
669 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
670 thisCommand = "pre.cluster(fasta=current, name=current, diffs=2)";
671 commands.push_back(thisCommand);
674 nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
675 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
676 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, cutoff=0.20)";
677 commands.push_back(thisCommand);
680 string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
681 thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, output=lt)";
682 commands.push_back(thisCommand);
685 string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
686 thisCommand = "read.dist(column=current, name=current)";
687 commands.push_back(thisCommand);
690 thisCommand = "cluster(method=average, hard=T)";
691 commands.push_back(thisCommand);
693 string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
694 string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
697 thisCommand = "degap.seqs(fasta=current)";
698 commands.push_back(thisCommand);
701 fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
702 thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=current, name=current, template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
703 commands.push_back(thisCommand);
705 string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
706 RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
707 if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
708 RippedTaxName += ".";
710 string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
711 string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
714 thisCommand = "phylotype(taxonomy=current, name=current)";
715 commands.push_back(thisCommand);
717 string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
718 string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
721 thisCommand = "clearcut(phylip=current, neighbor=T)";
722 commands.push_back(thisCommand);
724 string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
727 thisCommand = "make.shared(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
728 commands.push_back(thisCommand);
730 string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
733 thisCommand = "make.shared(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
734 commands.push_back(thisCommand);
736 string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
739 thisCommand = "set.current(shared=" + sharedFile + ")";
740 commands.push_back(thisCommand);
743 thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
744 commands.push_back(thisCommand);
747 thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
748 commands.push_back(thisCommand);
751 //thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
752 //commands.push_back(thisCommand);
755 thisCommand = "summary.single(rabund=" + rabundFile + ", label=0.03, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
756 commands.push_back(thisCommand);
759 thisCommand = "set.current(shared=" + phyloSharedFile + ")";
760 commands.push_back(thisCommand);
763 thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
764 commands.push_back(thisCommand);
767 thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
768 commands.push_back(thisCommand);
771 //thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
772 //commands.push_back(thisCommand);
775 thisCommand = "summary.single(rabund=" + phyloRabundFile + ", label=1, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
776 commands.push_back(thisCommand);
779 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
780 commands.push_back(thisCommand);
783 thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
784 commands.push_back(thisCommand);
787 thisCommand = "set.current(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
788 commands.push_back(thisCommand);
791 thisCommand = "phylo.diversity(tree=current, group=current, name=current, iters=100,rarefy=T)";
792 commands.push_back(thisCommand);
795 thisCommand = "unifrac.weighted(tree=current, group=current, name=current, random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
796 commands.push_back(thisCommand);
799 thisCommand = "unifrac.unweighted(tree=current, group=current, name=current, random=false, distance=true, processors=" + toString(processors) + ")";
800 commands.push_back(thisCommand);
804 catch(exception& e) {
805 m->errorOut(e, "PipelineCommand", "createPatsPipeline");
810 //**********************************************************************************************************************