2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
16 //**********************************************************************************************************************
17 vector<string> ChimeraUchimeCommand::setParameters(){
19 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
35 //**********************************************************************************************************************
36 string ChimeraUchimeCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
40 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
41 helpString += "The chimera.uchime command parameters are fasta, name, reference and processors.\n";
42 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
43 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
44 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
45 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
46 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
48 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
50 helpString += "The chimera.uchime command should be in the following format: \n";
51 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
52 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
53 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
57 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
61 //**********************************************************************************************************************
62 ChimeraUchimeCommand::ChimeraUchimeCommand(){
64 abort = true; calledHelp = true;
66 vector<string> tempOutNames;
67 outputTypes["chimera"] = tempOutNames;
68 outputTypes["accnos"] = tempOutNames;
71 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
75 //***************************************************************************************************************
76 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
78 abort = false; calledHelp = false;
80 //allow user to run help
81 if(option == "help") { help(); abort = true; calledHelp = true; }
82 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85 vector<string> myArray = setParameters();
87 OptionParser parser(option);
88 map<string,string> parameters = parser.getParameters();
90 ValidParameters validParameter("chimera.uchime");
91 map<string,string>::iterator it;
93 //check to make sure all parameters are valid for command
94 for (it = parameters.begin(); it != parameters.end(); it++) {
95 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
98 vector<string> tempOutNames;
99 outputTypes["chimera"] = tempOutNames;
100 outputTypes["accnos"] = tempOutNames;
102 //if the user changes the input directory command factory will send this info to us in the output parameter
103 string inputDir = validParameter.validFile(parameters, "inputdir", false);
104 if (inputDir == "not found"){ inputDir = ""; }
106 //check for required parameters
107 fastafile = validParameter.validFile(parameters, "fasta", false);
108 if (fastafile == "not found") {
109 //if there is a current fasta file, use it
110 string filename = m->getFastaFile();
111 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
112 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
114 m->splitAtDash(fastafile, fastaFileNames);
116 //go through files and make sure they are good, if not, then disregard them
117 for (int i = 0; i < fastaFileNames.size(); i++) {
120 if (fastaFileNames[i] == "current") {
121 fastaFileNames[i] = m->getFastaFile();
122 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
124 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
125 //erase from file list
126 fastaFileNames.erase(fastaFileNames.begin()+i);
133 if (inputDir != "") {
134 string path = m->hasPath(fastaFileNames[i]);
135 //if the user has not given a path then, add inputdir. else leave path alone.
136 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
142 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
144 //if you can't open it, try default location
145 if (ableToOpen == 1) {
146 if (m->getDefaultPath() != "") { //default path is set
147 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
148 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
150 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
152 fastaFileNames[i] = tryPath;
156 if (ableToOpen == 1) {
157 if (m->getOutputDir() != "") { //default path is set
158 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
159 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
161 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
163 fastaFileNames[i] = tryPath;
169 if (ableToOpen == 1) {
170 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
171 //erase from file list
172 fastaFileNames.erase(fastaFileNames.begin()+i);
178 //make sure there is at least one valid file left
179 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
183 //check for required parameters
185 namefile = validParameter.validFile(parameters, "name", false);
186 if (namefile == "not found") { namefile = ""; hasName = false; }
188 m->splitAtDash(namefile, nameFileNames);
190 //go through files and make sure they are good, if not, then disregard them
191 for (int i = 0; i < nameFileNames.size(); i++) {
194 if (nameFileNames[i] == "current") {
195 nameFileNames[i] = m->getNameFile();
196 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
198 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
199 //erase from file list
200 nameFileNames.erase(nameFileNames.begin()+i);
207 if (inputDir != "") {
208 string path = m->hasPath(nameFileNames[i]);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
216 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
218 //if you can't open it, try default location
219 if (ableToOpen == 1) {
220 if (m->getDefaultPath() != "") { //default path is set
221 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
222 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
224 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
226 nameFileNames[i] = tryPath;
230 if (ableToOpen == 1) {
231 if (m->getOutputDir() != "") { //default path is set
232 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
233 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
235 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
237 nameFileNames[i] = tryPath;
243 if (ableToOpen == 1) {
244 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
245 //erase from file list
246 nameFileNames.erase(nameFileNames.begin()+i);
252 //make sure there is at least one valid file left
253 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
256 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
258 //if the user changes the output directory command factory will send this info to us in the output parameter
259 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
263 it = parameters.find("reference");
264 //user has given a template file
265 if(it != parameters.end()){
266 if (it->second == "self") { templatefile = "self"; }
268 path = m->hasPath(it->second);
269 //if the user has not given a path then, add inputdir. else leave path alone.
270 if (path == "") { parameters["reference"] = inputDir + it->second; }
272 templatefile = validParameter.validFile(parameters, "reference", true);
273 if (templatefile == "not open") { abort = true; }
274 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true; }
278 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
279 m->setProcessors(temp);
280 convert(temp, processors);
283 catch(exception& e) {
284 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
288 //***************************************************************************************************************
290 int ChimeraUchimeCommand::execute(){
292 if (abort == true) { if (calledHelp) { return 0; } return 2; }
294 for (int s = 0; s < fastaFileNames.size(); s++) {
296 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
298 int start = time(NULL);
299 string nameFile = "";
301 if (templatefile == "self") { //you want to run slayer with a refernce template
305 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
306 if (pid == 0) { //you are the root process
309 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
310 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
311 nameFile = nameFileNames[s];
313 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
315 //use unique.seqs to create new name and fastafile
316 string inputString = "fasta=" + fastaFileNames[s];
317 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
318 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
320 Command* uniqueCommand = new DeconvoluteCommand(inputString);
321 uniqueCommand->execute();
323 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
325 delete uniqueCommand;
327 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
329 nameFile = filenames["name"][0];
330 fastaFileNames[s] = filenames["fasta"][0];
333 //create input file for uchime
334 //read through fastafile and store info
335 map<string, string> seqs;
337 m->openInputFile(fastaFileNames[s], in);
341 if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
343 Sequence seq(in); m->gobble(in);
344 seqs[seq.getName()] = seq.getAligned();
349 vector<seqPriorityNode> nameMapCount;
350 int error = m->readNames(nameFile, nameMapCount, seqs);
352 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
354 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
355 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
357 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
359 string newFasta = fastaFileNames[s] + ".temp";
361 m->openOutputFile(newFasta, out);
363 //print new file in order of
364 for (int i = 0; i < nameMapCount.size(); i++) {
365 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
369 fastaFileNames[s] = newFasta;
374 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
377 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
378 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera";
379 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos";
381 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
383 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
384 if(processors == 1){ driver(outputFileName, fastaFileNames[s], accnosFileName); }
385 else{ createProcesses(outputFileName, fastaFileNames[s], accnosFileName); }
387 driver(outputFileName, fastaFileNames[s], accnosFileName);
389 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
392 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
393 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
395 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check your sequences."); m->mothurOutEndLine();
398 //set accnos file as new current accnosfile
400 itTypes = outputTypes.find("accnos");
401 if (itTypes != outputTypes.end()) {
402 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
405 m->mothurOutEndLine();
406 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
407 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
408 m->mothurOutEndLine();
413 catch(exception& e) {
414 m->errorOut(e, "ChimeraUchimeCommand", "execute");
418 //**********************************************************************************************************************
420 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos){
425 char* tempUchime = new char[8];
426 strcpy(tempUchime, "./uchime ");
427 cPara.push_back(tempUchime);
429 char* tempIn = new char[7];
430 strcpy(tempIn, "--input");
431 cPara.push_back(tempIn);
432 char* temp = new char[filename.length()];
433 strcpy(temp, filename.c_str());
434 cPara.push_back(temp);
436 //are you using a reference file
437 if (templatefile != "self") {
440 char* tempRef = new char[4];
441 strcpy(tempRef, "--db");
442 cPara.push_back(tempRef);
443 char* tempR = new char[templatefile.length()];
444 strcpy(tempR, templatefile.c_str());
445 cPara.push_back(tempR);
448 char* tempO = new char[11];
449 strcpy(tempO, "--uchimeout");
450 cPara.push_back(tempO);
451 char* tempout = new char[outputFName.length()];
452 strcpy(tempout, outputFName.c_str());
453 cPara.push_back(tempout);
455 char** uchimeParameters;
456 uchimeParameters = new char*[cPara.size()];
457 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; cout << cPara[i]; } cout << endl;
458 int numArgs = cPara.size();
460 uchime_main(numArgs, uchimeParameters);
463 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
464 delete[] uchimeParameters;
466 //create accnos file from uchime results
468 m->openInputFile(outputFName, in);
471 m->openOutputFile(accnos, out);
476 if (m->control_pressed) { break; }
479 string chimeraFlag = "";
480 in >> chimeraFlag >> name;
483 if (templatefile != "self") {
484 name = name.substr(0, name.length()-1); //rip off last /
485 name = name.substr(0, name.find_last_of('/'));
488 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
491 if (chimeraFlag == "Y") { out << name << endl; }
499 catch(exception& e) {
500 m->errorOut(e, "ChimeraUchimeCommand", "driver");
504 /**************************************************************************************************/
506 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos) {
513 //break up file into multiple files
514 vector<string> files;
515 m->divideFile(filename, processors, files);
517 if (m->control_pressed) { return 0; }
520 int pid, numSeqsPerProcessor;
524 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
525 MPI_Comm_size(MPI_COMM_WORLD, &processors);
527 if (pid == 0) { //you are the root process
528 num = driver(outputFileName, files[0], accnos);
530 if (templatefile != "self") {
532 for(int j = 1; j < processors; j++) {
534 MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
537 m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
538 remove((outputFileName + toString(j) + ".temp").c_str());
540 m->appendFiles((accnos + toString(j) + ".temp"), accnos);
541 remove((accnos + toString(j) + ".temp").c_str());
544 }else{ //you are a child process
545 if (templatefile != "self") { //if template=self we can only use 1 processor
546 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp");
548 //send numSeqs to parent
549 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
553 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
556 //loop through and create all the processes you want
557 while (process != processors) {
561 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
564 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp");
566 //pass numSeqs to parent
568 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
569 m->openOutputFile(tempFile, out);
575 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
576 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
582 num = driver(outputFileName, files[0], accnos);
584 //force parent to wait until all the processes are done
585 for (int i=0;i<processIDS.size();i++) {
586 int temp = processIDS[i];
590 for (int i = 0; i < processIDS.size(); i++) {
592 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
593 m->openInputFile(tempFile, in);
594 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
595 in.close(); remove(tempFile.c_str());
599 //append output files
600 for(int i=0;i<processIDS[i];i++){
601 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
602 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
604 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
605 remove((accnos + toString(processIDS[i]) + ".temp").c_str());
610 catch(exception& e) {
611 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
616 /**************************************************************************************************/