2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
16 //**********************************************************************************************************************
17 vector<string> ChimeraUchimeCommand::setParameters(){
19 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
26 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
27 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
28 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
29 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
30 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
31 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
32 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
33 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
34 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
35 CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
36 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
37 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
38 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
39 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
40 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
41 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
42 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
44 vector<string> myArray;
45 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
49 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
53 //**********************************************************************************************************************
54 string ChimeraUchimeCommand::getHelpString(){
56 string helpString = "";
57 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
58 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
59 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
60 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
61 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
62 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
63 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
64 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
65 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
66 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
67 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
68 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease --min h, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
69 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
70 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
71 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
72 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
73 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
74 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
75 helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
76 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
77 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
78 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
79 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
80 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
81 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
82 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
84 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
86 helpString += "The chimera.uchime command should be in the following format: \n";
87 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
88 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
89 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
93 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97 //**********************************************************************************************************************
98 ChimeraUchimeCommand::ChimeraUchimeCommand(){
100 abort = true; calledHelp = true;
102 vector<string> tempOutNames;
103 outputTypes["chimera"] = tempOutNames;
104 outputTypes["accnos"] = tempOutNames;
105 outputTypes["alns"] = tempOutNames;
107 catch(exception& e) {
108 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112 //***************************************************************************************************************
113 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
115 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 vector<string> myArray = setParameters();
124 OptionParser parser(option);
125 map<string,string> parameters = parser.getParameters();
127 ValidParameters validParameter("chimera.uchime");
128 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 vector<string> tempOutNames;
136 outputTypes["chimera"] = tempOutNames;
137 outputTypes["accnos"] = tempOutNames;
138 outputTypes["alns"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
144 //check for required parameters
145 fastafile = validParameter.validFile(parameters, "fasta", false);
146 if (fastafile == "not found") {
147 //if there is a current fasta file, use it
148 string filename = m->getFastaFile();
149 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
152 m->splitAtDash(fastafile, fastaFileNames);
154 //go through files and make sure they are good, if not, then disregard them
155 for (int i = 0; i < fastaFileNames.size(); i++) {
158 if (fastaFileNames[i] == "current") {
159 fastaFileNames[i] = m->getFastaFile();
160 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
162 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
163 //erase from file list
164 fastaFileNames.erase(fastaFileNames.begin()+i);
171 if (inputDir != "") {
172 string path = m->hasPath(fastaFileNames[i]);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
180 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
182 //if you can't open it, try default location
183 if (ableToOpen == 1) {
184 if (m->getDefaultPath() != "") { //default path is set
185 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190 fastaFileNames[i] = tryPath;
194 if (ableToOpen == 1) {
195 if (m->getOutputDir() != "") { //default path is set
196 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
199 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
201 fastaFileNames[i] = tryPath;
207 if (ableToOpen == 1) {
208 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
209 //erase from file list
210 fastaFileNames.erase(fastaFileNames.begin()+i);
213 m->setFastaFile(fastaFileNames[i]);
218 //make sure there is at least one valid file left
219 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
223 //check for required parameters
225 namefile = validParameter.validFile(parameters, "name", false);
226 if (namefile == "not found") { namefile = ""; hasName = false; }
228 m->splitAtDash(namefile, nameFileNames);
230 //go through files and make sure they are good, if not, then disregard them
231 for (int i = 0; i < nameFileNames.size(); i++) {
234 if (nameFileNames[i] == "current") {
235 nameFileNames[i] = m->getNameFile();
236 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
238 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
239 //erase from file list
240 nameFileNames.erase(nameFileNames.begin()+i);
247 if (inputDir != "") {
248 string path = m->hasPath(nameFileNames[i]);
249 //if the user has not given a path then, add inputdir. else leave path alone.
250 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
256 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
258 //if you can't open it, try default location
259 if (ableToOpen == 1) {
260 if (m->getDefaultPath() != "") { //default path is set
261 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
262 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
264 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
266 nameFileNames[i] = tryPath;
270 if (ableToOpen == 1) {
271 if (m->getOutputDir() != "") { //default path is set
272 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
273 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
275 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
277 nameFileNames[i] = tryPath;
283 if (ableToOpen == 1) {
284 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
285 //erase from file list
286 nameFileNames.erase(nameFileNames.begin()+i);
289 m->setNameFile(nameFileNames[i]);
294 //make sure there is at least one valid file left
295 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
298 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
300 //if the user changes the output directory command factory will send this info to us in the output parameter
301 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
305 it = parameters.find("reference");
306 //user has given a template file
307 if(it != parameters.end()){
308 if (it->second == "self") { templatefile = "self"; }
310 path = m->hasPath(it->second);
311 //if the user has not given a path then, add inputdir. else leave path alone.
312 if (path == "") { parameters["reference"] = inputDir + it->second; }
314 templatefile = validParameter.validFile(parameters, "reference", true);
315 if (templatefile == "not open") { abort = true; }
316 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true; }
318 }else if (hasName) { templatefile = "self"; }
319 else { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true; }
322 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
323 m->setProcessors(temp);
324 convert(temp, processors);
326 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
327 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
329 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
330 chimealns = m->isTrue(temp);
332 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
333 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
334 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
335 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
336 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
337 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
338 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
339 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
340 minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
341 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
342 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
343 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
345 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
346 ucl = m->isTrue(temp);
348 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
349 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
351 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
352 skipgaps = m->isTrue(temp);
354 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
355 skipgaps2 = m->isTrue(temp);
359 catch(exception& e) {
360 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
364 //***************************************************************************************************************
366 int ChimeraUchimeCommand::execute(){
368 if (abort == true) { if (calledHelp) { return 0; } return 2; }
370 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
372 for (int s = 0; s < fastaFileNames.size(); s++) {
374 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
376 int start = time(NULL);
377 string nameFile = "";
379 if (templatefile == "self") { //you want to run uchime with a refernce template
383 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
384 if (pid == 0) { //you are the root process
387 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
388 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
389 nameFile = nameFileNames[s];
391 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
393 //use unique.seqs to create new name and fastafile
394 string inputString = "fasta=" + fastaFileNames[s];
395 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
396 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
398 Command* uniqueCommand = new DeconvoluteCommand(inputString);
399 uniqueCommand->execute();
401 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
403 delete uniqueCommand;
405 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
407 nameFile = filenames["name"][0];
408 fastaFileNames[s] = filenames["fasta"][0];
411 //create input file for uchime
412 //read through fastafile and store info
413 map<string, string> seqs;
415 m->openInputFile(fastaFileNames[s], in);
419 if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
421 Sequence seq(in); m->gobble(in);
422 seqs[seq.getName()] = seq.getAligned();
427 vector<seqPriorityNode> nameMapCount;
428 int error = m->readNames(nameFile, nameMapCount, seqs);
430 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
432 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
433 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
435 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
437 string newFasta = fastaFileNames[s] + ".temp";
439 m->openOutputFile(newFasta, out);
441 //print new file in order of
442 for (int i = 0; i < nameMapCount.size(); i++) {
443 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
447 fastaFileNames[s] = newFasta;
452 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
455 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
456 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
457 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
458 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
460 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
463 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
465 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
467 numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
469 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
471 //remove file made for uchime
472 if (templatefile == "self") { remove(fastaFileNames[s].c_str()); }
474 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
475 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
476 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
478 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
481 //set accnos file as new current accnosfile
483 itTypes = outputTypes.find("accnos");
484 if (itTypes != outputTypes.end()) {
485 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
488 m->mothurOutEndLine();
489 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
490 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
491 m->mothurOutEndLine();
496 catch(exception& e) {
497 m->errorOut(e, "ChimeraUchimeCommand", "execute");
501 //**********************************************************************************************************************
503 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
508 char* tempUchime = new char[8];
509 strcpy(tempUchime, "./uchime ");
510 cPara.push_back(tempUchime);
512 char* tempIn = new char[7];
513 strcpy(tempIn, "--input");
514 cPara.push_back(tempIn);
515 char* temp = new char[filename.length()];
516 strcpy(temp, filename.c_str());
517 cPara.push_back(temp);
519 //are you using a reference file
520 if (templatefile != "self") {
523 char* tempRef = new char[4];
524 strcpy(tempRef, "--db");
525 cPara.push_back(tempRef);
526 char* tempR = new char[templatefile.length()];
527 strcpy(tempR, templatefile.c_str());
528 cPara.push_back(tempR);
531 char* tempO = new char[11];
532 strcpy(tempO, "--uchimeout");
533 cPara.push_back(tempO);
534 char* tempout = new char[outputFName.length()];
535 strcpy(tempout, outputFName.c_str());
536 cPara.push_back(tempout);
539 char* tempA = new char[12];
540 strcpy(tempA, "--uchimealns");
541 cPara.push_back(tempA);
542 char* tempa = new char[alns.length()];
543 strcpy(tempa, alns.c_str());
544 cPara.push_back(tempa);
548 char* tempskew = new char[8];
549 strcpy(tempskew, "--abskew");
550 cPara.push_back(tempskew);
551 char* tempSkew = new char[abskew.length()];
552 strcpy(tempSkew, abskew.c_str());
553 cPara.push_back(tempSkew);
557 char* tempminh = new char[6];
558 strcpy(tempminh, "--minh");
559 cPara.push_back(tempminh);
560 char* tempMinH = new char[minh.length()];
561 strcpy(tempMinH, minh.c_str());
562 cPara.push_back(tempMinH);
566 char* tempmindiv = new char[8];
567 strcpy(tempmindiv, "--mindiv");
568 cPara.push_back(tempmindiv);
569 char* tempMindiv = new char[mindiv.length()];
570 strcpy(tempMindiv, mindiv.c_str());
571 cPara.push_back(tempMindiv);
575 char* tempxn = new char[4];
576 strcpy(tempxn, "--xn");
577 cPara.push_back(tempxn);
578 char* tempXn = new char[xn.length()];
579 strcpy(tempXn, xn.c_str());
580 cPara.push_back(tempXn);
584 char* tempdn = new char[4];
585 strcpy(tempdn, "--dn");
586 cPara.push_back(tempdn);
587 char* tempDn = new char[dn.length()];
588 strcpy(tempDn, dn.c_str());
589 cPara.push_back(tempDn);
593 char* tempxa = new char[4];
594 strcpy(tempxa, "--xa");
595 cPara.push_back(tempxa);
596 char* tempXa = new char[xa.length()];
597 strcpy(tempXa, xa.c_str());
598 cPara.push_back(tempXa);
602 char* tempchunks = new char[8];
603 strcpy(tempchunks, "--chunks");
604 cPara.push_back(tempchunks);
605 char* tempChunks = new char[chunks.length()];
606 strcpy(tempChunks, chunks.c_str());
607 cPara.push_back(tempChunks);
611 char* tempminchunk = new char[10];
612 strcpy(tempminchunk, "--minchunk");
613 cPara.push_back(tempminchunk);
614 char* tempMinchunk = new char[minchunk.length()];
615 strcpy(tempMinchunk, minchunk.c_str());
616 cPara.push_back(tempMinchunk);
619 if (useIdsmoothwindow) {
620 char* tempidsmoothwindow = new char[16];
621 strcpy(tempidsmoothwindow, "--idsmoothwindow");
622 cPara.push_back(tempidsmoothwindow);
623 char* tempIdsmoothwindow = new char[idsmoothwindow.length()];
624 strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
625 cPara.push_back(tempIdsmoothwindow);
628 if (useMinsmoothid) {
629 char* tempminsmoothid = new char[13];
630 strcpy(tempminsmoothid, "--minsmoothid");
631 cPara.push_back(tempminsmoothid);
632 char* tempMinsmoothid = new char[minsmoothid.length()];
633 strcpy(tempMinsmoothid, minsmoothid.c_str());
634 cPara.push_back(tempMinsmoothid);
638 char* tempmaxp = new char[6];
639 strcpy(tempmaxp, "--maxp");
640 cPara.push_back(tempmaxp);
641 char* tempMaxp = new char[maxp.length()];
642 strcpy(tempMaxp, maxp.c_str());
643 cPara.push_back(tempMaxp);
647 char* tempskipgaps = new char[14];
648 strcpy(tempskipgaps, "--[no]skipgaps");
649 cPara.push_back(tempskipgaps);
653 char* tempskipgaps2 = new char[15];
654 strcpy(tempskipgaps2, "--[no]skipgaps2");
655 cPara.push_back(tempskipgaps2);
659 char* tempminlen = new char[8];
660 strcpy(tempminlen, "--minlen");
661 cPara.push_back(tempminlen);
662 char* tempMinlen = new char[minlen.length()];
663 strcpy(tempMinlen, minlen.c_str());
664 cPara.push_back(tempMinlen);
668 char* tempmaxlen = new char[8];
669 strcpy(tempmaxlen, "--maxlen");
670 cPara.push_back(tempmaxlen);
671 char* tempMaxlen = new char[maxlen.length()];
672 strcpy(tempMaxlen, maxlen.c_str());
673 cPara.push_back(tempMaxlen);
677 char* tempucl = new char[5];
678 strcpy(tempucl, "--ucl");
679 cPara.push_back(tempucl);
683 char* tempqueryfract = new char[12];
684 strcpy(tempqueryfract, "--queryfract");
685 cPara.push_back(tempqueryfract);
686 char* tempQueryfract = new char[queryfract.length()];
687 strcpy(tempQueryfract, queryfract.c_str());
688 cPara.push_back(tempQueryfract);
692 char** uchimeParameters;
693 uchimeParameters = new char*[cPara.size()];
694 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; }
695 int numArgs = cPara.size();
697 uchime_main(numArgs, uchimeParameters);
700 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
701 delete[] uchimeParameters;
703 if (m->control_pressed) { return 0; }
705 //create accnos file from uchime results
707 m->openInputFile(outputFName, in);
710 m->openOutputFile(accnos, out);
715 if (m->control_pressed) { break; }
718 string chimeraFlag = "";
719 in >> chimeraFlag >> name;
722 if (templatefile == "self") {
723 name = name.substr(0, name.length()-1); //rip off last /
724 name = name.substr(0, name.find_last_of('/'));
727 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
730 if (chimeraFlag == "Y") { out << name << endl; }
738 catch(exception& e) {
739 m->errorOut(e, "ChimeraUchimeCommand", "driver");
743 /**************************************************************************************************/
745 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
752 //break up file into multiple files
753 vector<string> files;
754 m->divideFile(filename, processors, files);
756 if (m->control_pressed) { return 0; }
759 int pid, numSeqsPerProcessor;
763 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
764 MPI_Comm_size(MPI_COMM_WORLD, &processors);
766 if (pid == 0) { //you are the root process
767 num = driver(outputFileName, files[0], accnos, alns);
769 if (templatefile != "self") {
771 for(int j = 1; j < processors; j++) {
773 MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
776 m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
777 remove((outputFileName + toString(j) + ".temp").c_str());
779 m->appendFiles((accnos + toString(j) + ".temp"), accnos);
780 remove((accnos + toString(j) + ".temp").c_str());
783 m->appendFiles((alns + toString(j) + ".temp"), alns);
784 remove((alns + toString(j) + ".temp").c_str());
788 }else{ //you are a child process
789 if (templatefile != "self") { //if template=self we can only use 1 processor
790 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp");
792 //send numSeqs to parent
793 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
797 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
800 //loop through and create all the processes you want
801 while (process != processors) {
805 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
808 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
810 //pass numSeqs to parent
812 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
813 m->openOutputFile(tempFile, out);
819 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
820 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
826 num = driver(outputFileName, files[0], accnos, alns);
828 //force parent to wait until all the processes are done
829 for (int i=0;i<processIDS.size();i++) {
830 int temp = processIDS[i];
834 for (int i = 0; i < processIDS.size(); i++) {
836 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
837 m->openInputFile(tempFile, in);
838 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
839 in.close(); remove(tempFile.c_str());
843 //append output files
844 for(int i=0;i<processIDS[i];i++){
845 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
846 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
848 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
849 remove((accnos + toString(processIDS[i]) + ".temp").c_str());
852 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
853 remove((alns + toString(processIDS[i]) + ".temp").c_str());
857 //get rid of the file pieces.
858 for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
862 catch(exception& e) {
863 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
868 /**************************************************************************************************/