2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
16 //**********************************************************************************************************************
17 vector<string> ChimeraUchimeCommand::setParameters(){
19 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
26 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
27 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
28 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
29 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
30 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
31 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
32 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
33 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
34 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
35 CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
36 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
37 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
38 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
39 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
40 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
41 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
42 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
44 vector<string> myArray;
45 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
49 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
53 //**********************************************************************************************************************
54 string ChimeraUchimeCommand::getHelpString(){
56 string helpString = "";
57 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
58 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
59 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
60 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
61 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
62 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
63 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
64 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
65 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
66 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
67 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
68 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
69 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
70 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
71 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
72 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
73 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
74 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
75 helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
76 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
77 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
78 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
79 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
80 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
81 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
82 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
84 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
86 helpString += "The chimera.uchime command should be in the following format: \n";
87 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
88 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
89 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
93 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97 //**********************************************************************************************************************
98 ChimeraUchimeCommand::ChimeraUchimeCommand(){
100 abort = true; calledHelp = true;
102 vector<string> tempOutNames;
103 outputTypes["chimera"] = tempOutNames;
104 outputTypes["accnos"] = tempOutNames;
105 outputTypes["alns"] = tempOutNames;
107 catch(exception& e) {
108 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112 //***************************************************************************************************************
113 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
115 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 vector<string> myArray = setParameters();
124 OptionParser parser(option);
125 map<string,string> parameters = parser.getParameters();
127 ValidParameters validParameter("chimera.uchime");
128 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 vector<string> tempOutNames;
136 outputTypes["chimera"] = tempOutNames;
137 outputTypes["accnos"] = tempOutNames;
138 outputTypes["alns"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
144 //check for required parameters
145 fastafile = validParameter.validFile(parameters, "fasta", false);
146 if (fastafile == "not found") {
147 //if there is a current fasta file, use it
148 string filename = m->getFastaFile();
149 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
152 m->splitAtDash(fastafile, fastaFileNames);
154 //go through files and make sure they are good, if not, then disregard them
155 for (int i = 0; i < fastaFileNames.size(); i++) {
158 if (fastaFileNames[i] == "current") {
159 fastaFileNames[i] = m->getFastaFile();
160 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
162 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
163 //erase from file list
164 fastaFileNames.erase(fastaFileNames.begin()+i);
171 if (inputDir != "") {
172 string path = m->hasPath(fastaFileNames[i]);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
180 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
182 //if you can't open it, try default location
183 if (ableToOpen == 1) {
184 if (m->getDefaultPath() != "") { //default path is set
185 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190 fastaFileNames[i] = tryPath;
194 if (ableToOpen == 1) {
195 if (m->getOutputDir() != "") { //default path is set
196 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
199 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
201 fastaFileNames[i] = tryPath;
207 if (ableToOpen == 1) {
208 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
209 //erase from file list
210 fastaFileNames.erase(fastaFileNames.begin()+i);
213 m->setFastaFile(fastaFileNames[i]);
218 //make sure there is at least one valid file left
219 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
223 //check for required parameters
225 namefile = validParameter.validFile(parameters, "name", false);
226 if (namefile == "not found") { namefile = ""; hasName = false; }
228 m->splitAtDash(namefile, nameFileNames);
230 //go through files and make sure they are good, if not, then disregard them
231 for (int i = 0; i < nameFileNames.size(); i++) {
234 if (nameFileNames[i] == "current") {
235 nameFileNames[i] = m->getNameFile();
236 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
238 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
239 //erase from file list
240 nameFileNames.erase(nameFileNames.begin()+i);
247 if (inputDir != "") {
248 string path = m->hasPath(nameFileNames[i]);
249 //if the user has not given a path then, add inputdir. else leave path alone.
250 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
256 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
258 //if you can't open it, try default location
259 if (ableToOpen == 1) {
260 if (m->getDefaultPath() != "") { //default path is set
261 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
262 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
264 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
266 nameFileNames[i] = tryPath;
270 if (ableToOpen == 1) {
271 if (m->getOutputDir() != "") { //default path is set
272 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
273 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
275 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
277 nameFileNames[i] = tryPath;
283 if (ableToOpen == 1) {
284 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
285 //erase from file list
286 nameFileNames.erase(nameFileNames.begin()+i);
289 m->setNameFile(nameFileNames[i]);
294 //make sure there is at least one valid file left
295 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
298 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
300 //if the user changes the output directory command factory will send this info to us in the output parameter
301 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
305 it = parameters.find("reference");
306 //user has given a template file
307 if(it != parameters.end()){
308 if (it->second == "self") { templatefile = "self"; }
310 path = m->hasPath(it->second);
311 //if the user has not given a path then, add inputdir. else leave path alone.
312 if (path == "") { parameters["reference"] = inputDir + it->second; }
314 templatefile = validParameter.validFile(parameters, "reference", true);
315 if (templatefile == "not open") { abort = true; }
316 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true; }
318 }else if (hasName) { templatefile = "self"; }
319 else { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true; }
322 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
323 m->setProcessors(temp);
324 convert(temp, processors);
326 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
327 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
329 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
330 chimealns = m->isTrue(temp);
332 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
333 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
334 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
335 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
336 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
337 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
338 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
339 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
340 minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
341 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
342 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
343 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
345 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
346 ucl = m->isTrue(temp);
348 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
349 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
351 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
352 skipgaps = m->isTrue(temp);
354 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
355 skipgaps2 = m->isTrue(temp);
359 catch(exception& e) {
360 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
364 //***************************************************************************************************************
366 int ChimeraUchimeCommand::execute(){
368 if (abort == true) { if (calledHelp) { return 0; } return 2; }
370 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
372 for (int s = 0; s < fastaFileNames.size(); s++) {
374 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
376 int start = time(NULL);
377 string nameFile = "";
379 if (templatefile == "self") { //you want to run uchime with a reference template
383 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
384 if (pid == 0) { //you are the root process
387 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
388 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
389 nameFile = nameFileNames[s];
391 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
393 //use unique.seqs to create new name and fastafile
394 string inputString = "fasta=" + fastaFileNames[s];
395 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
396 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
398 Command* uniqueCommand = new DeconvoluteCommand(inputString);
399 uniqueCommand->execute();
401 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
403 delete uniqueCommand;
405 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
407 nameFile = filenames["name"][0];
408 fastaFileNames[s] = filenames["fasta"][0];
411 //create input file for uchime
412 //read through fastafile and store info
413 map<string, string> seqs;
415 m->openInputFile(fastaFileNames[s], in);
419 if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
421 Sequence seq(in); m->gobble(in);
422 seqs[seq.getName()] = seq.getAligned();
427 vector<seqPriorityNode> nameMapCount;
428 int error = m->readNames(nameFile, nameMapCount, seqs);
430 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
432 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
433 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
435 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
437 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
439 m->openOutputFile(newFasta, out);
441 //print new file in order of
442 for (int i = 0; i < nameMapCount.size(); i++) {
443 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
447 fastaFileNames[s] = newFasta;
452 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
455 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
456 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
457 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
458 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
460 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
463 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
465 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
467 numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
469 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
471 //remove file made for uchime
472 if (templatefile == "self") { remove(fastaFileNames[s].c_str()); }
474 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
475 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
476 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
478 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
481 //set accnos file as new current accnosfile
483 itTypes = outputTypes.find("accnos");
484 if (itTypes != outputTypes.end()) {
485 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
488 m->mothurOutEndLine();
489 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
490 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
491 m->mothurOutEndLine();
496 catch(exception& e) {
497 m->errorOut(e, "ChimeraUchimeCommand", "execute");
501 //**********************************************************************************************************************
503 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
508 char* tempUchime = new char[8];
509 strcpy(tempUchime, "./uchime ");
510 cPara.push_back(tempUchime);
512 char* tempIn = new char[8];
513 *tempIn = '\0'; strncat(tempIn, "--input", 7);
514 //strcpy(tempIn, "--input");
515 cPara.push_back(tempIn);
516 char* temp = new char[filename.length()+1];
517 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
518 //strcpy(temp, filename.c_str());
519 cPara.push_back(temp);
521 //are you using a reference file
522 if (templatefile != "self") {
524 char* tempRef = new char[5];
525 //strcpy(tempRef, "--db");
526 *tempRef = '\0'; strncat(tempRef, "--db", 4);
527 cPara.push_back(tempRef);
528 char* tempR = new char[templatefile.length()+1];
529 //strcpy(tempR, templatefile.c_str());
530 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
531 cPara.push_back(tempR);
534 char* tempO = new char[12];
535 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
536 //strcpy(tempO, "--uchimeout");
537 cPara.push_back(tempO);
538 char* tempout = new char[outputFName.length()+1];
539 //strcpy(tempout, outputFName.c_str());
540 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
541 cPara.push_back(tempout);
544 char* tempA = new char[13];
545 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
546 //strcpy(tempA, "--uchimealns");
547 cPara.push_back(tempA);
548 char* tempa = new char[alns.length()+1];
549 //strcpy(tempa, alns.c_str());
550 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
551 cPara.push_back(tempa);
555 char* tempskew = new char[9];
556 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
557 //strcpy(tempskew, "--abskew");
558 cPara.push_back(tempskew);
559 char* tempSkew = new char[abskew.length()+1];
560 //strcpy(tempSkew, abskew.c_str());
561 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
562 cPara.push_back(tempSkew);
566 char* tempminh = new char[7];
567 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
568 //strcpy(tempminh, "--minh");
569 cPara.push_back(tempminh);
570 char* tempMinH = new char[minh.length()+1];
571 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
572 //strcpy(tempMinH, minh.c_str());
573 cPara.push_back(tempMinH);
577 char* tempmindiv = new char[9];
578 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
579 //strcpy(tempmindiv, "--mindiv");
580 cPara.push_back(tempmindiv);
581 char* tempMindiv = new char[mindiv.length()+1];
582 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
583 //strcpy(tempMindiv, mindiv.c_str());
584 cPara.push_back(tempMindiv);
588 char* tempxn = new char[5];
589 //strcpy(tempxn, "--xn");
590 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
591 cPara.push_back(tempxn);
592 char* tempXn = new char[xn.length()+1];
593 //strcpy(tempXn, xn.c_str());
594 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
595 cPara.push_back(tempXn);
599 char* tempdn = new char[5];
600 //strcpy(tempdn, "--dn");
601 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
602 cPara.push_back(tempdn);
603 char* tempDn = new char[dn.length()+1];
604 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
605 //strcpy(tempDn, dn.c_str());
606 cPara.push_back(tempDn);
610 char* tempxa = new char[5];
611 //strcpy(tempxa, "--xa");
612 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
613 cPara.push_back(tempxa);
614 char* tempXa = new char[xa.length()+1];
615 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
616 //strcpy(tempXa, xa.c_str());
617 cPara.push_back(tempXa);
621 char* tempchunks = new char[9];
622 //strcpy(tempchunks, "--chunks");
623 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
624 cPara.push_back(tempchunks);
625 char* tempChunks = new char[chunks.length()+1];
626 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
627 //strcpy(tempChunks, chunks.c_str());
628 cPara.push_back(tempChunks);
632 char* tempminchunk = new char[11];
633 //strcpy(tempminchunk, "--minchunk");
634 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
635 cPara.push_back(tempminchunk);
636 char* tempMinchunk = new char[minchunk.length()+1];
637 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
638 //strcpy(tempMinchunk, minchunk.c_str());
639 cPara.push_back(tempMinchunk);
642 if (useIdsmoothwindow) {
643 char* tempidsmoothwindow = new char[17];
644 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
645 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
646 cPara.push_back(tempidsmoothwindow);
647 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
648 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
649 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
650 cPara.push_back(tempIdsmoothwindow);
653 if (useMinsmoothid) {
654 char* tempminsmoothid = new char[14];
655 //strcpy(tempminsmoothid, "--minsmoothid");
656 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
657 cPara.push_back(tempminsmoothid);
658 char* tempMinsmoothid = new char[minsmoothid.length()+1];
659 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
660 //strcpy(tempMinsmoothid, minsmoothid.c_str());
661 cPara.push_back(tempMinsmoothid);
665 char* tempmaxp = new char[7];
666 //strcpy(tempmaxp, "--maxp");
667 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
668 cPara.push_back(tempmaxp);
669 char* tempMaxp = new char[maxp.length()+1];
670 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
671 //strcpy(tempMaxp, maxp.c_str());
672 cPara.push_back(tempMaxp);
676 char* tempskipgaps = new char[15];
677 //strcpy(tempskipgaps, "--[no]skipgaps");
678 *tempskipgaps = '\0'; strncat(tempskipgaps, "--[no]skipgaps", 14);
679 cPara.push_back(tempskipgaps);
683 char* tempskipgaps2 = new char[16];
684 //strcpy(tempskipgaps2, "--[no]skipgaps2");
685 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--[no]skipgaps2", 15);
686 cPara.push_back(tempskipgaps2);
690 char* tempminlen = new char[9];
691 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
692 //strcpy(tempminlen, "--minlen");
693 cPara.push_back(tempminlen);
694 char* tempMinlen = new char[minlen.length()+1];
695 //strcpy(tempMinlen, minlen.c_str());
696 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
697 cPara.push_back(tempMinlen);
701 char* tempmaxlen = new char[9];
702 //strcpy(tempmaxlen, "--maxlen");
703 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
704 cPara.push_back(tempmaxlen);
705 char* tempMaxlen = new char[maxlen.length()+1];
706 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
707 //strcpy(tempMaxlen, maxlen.c_str());
708 cPara.push_back(tempMaxlen);
712 char* tempucl = new char[5];
713 strcpy(tempucl, "--ucl");
714 cPara.push_back(tempucl);
718 char* tempqueryfract = new char[13];
719 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
720 //strcpy(tempqueryfract, "--queryfract");
721 cPara.push_back(tempqueryfract);
722 char* tempQueryfract = new char[queryfract.length()+1];
723 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
724 //strcpy(tempQueryfract, queryfract.c_str());
725 cPara.push_back(tempQueryfract);
729 char** uchimeParameters;
730 uchimeParameters = new char*[cPara.size()];
731 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; }
732 int numArgs = cPara.size();
734 uchime_main(numArgs, uchimeParameters);
737 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
738 delete[] uchimeParameters;
740 if (m->control_pressed) { return 0; }
742 //create accnos file from uchime results
744 m->openInputFile(outputFName, in);
747 m->openOutputFile(accnos, out);
752 if (m->control_pressed) { break; }
755 string chimeraFlag = "";
756 in >> chimeraFlag >> name;
759 if (templatefile == "self") {
760 name = name.substr(0, name.length()-1); //rip off last /
761 name = name.substr(0, name.find_last_of('/'));
764 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
767 if (chimeraFlag == "Y") { out << name << endl; }
775 catch(exception& e) {
776 m->errorOut(e, "ChimeraUchimeCommand", "driver");
780 /**************************************************************************************************/
782 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
788 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
789 //break up file into multiple files
790 vector<string> files;
791 m->divideFile(filename, processors, files);
793 if (m->control_pressed) { return 0; }
796 int pid, numSeqsPerProcessor;
800 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
801 MPI_Comm_size(MPI_COMM_WORLD, &processors);
803 if (pid == 0) { //you are the root process
804 num = driver(outputFileName, files[0], accnos, alns);
806 if (templatefile != "self") {
808 for(int j = 1; j < processors; j++) {
810 MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
813 m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
814 remove((outputFileName + toString(j) + ".temp").c_str());
816 m->appendFiles((accnos + toString(j) + ".temp"), accnos);
817 remove((accnos + toString(j) + ".temp").c_str());
820 m->appendFiles((alns + toString(j) + ".temp"), alns);
821 remove((alns + toString(j) + ".temp").c_str());
825 }else{ //you are a child process
826 if (templatefile != "self") { //if template=self we can only use 1 processor
827 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp");
829 //send numSeqs to parent
830 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
834 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
837 //loop through and create all the processes you want
838 while (process != processors) {
842 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
845 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
847 //pass numSeqs to parent
849 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
850 m->openOutputFile(tempFile, out);
856 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
857 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
863 num = driver(outputFileName, files[0], accnos, alns);
865 //force parent to wait until all the processes are done
866 for (int i=0;i<processIDS.size();i++) {
867 int temp = processIDS[i];
871 for (int i = 0; i < processIDS.size(); i++) {
873 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
874 m->openInputFile(tempFile, in);
875 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
876 in.close(); remove(tempFile.c_str());
880 //append output files
881 for(int i=0;i<processIDS[i];i++){
882 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
883 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
885 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
886 remove((accnos + toString(processIDS[i]) + ".temp").c_str());
889 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
890 remove((alns + toString(processIDS[i]) + ".temp").c_str());
894 //get rid of the file pieces.
895 for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
899 catch(exception& e) {
900 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
905 /**************************************************************************************************/