2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
16 //**********************************************************************************************************************
17 vector<string> ChimeraUchimeCommand::setParameters(){
19 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
26 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
27 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
28 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
29 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
30 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
31 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
32 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
33 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
34 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
35 CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
36 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
37 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
38 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
39 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
40 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
41 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
42 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
44 vector<string> myArray;
45 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
49 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
53 //**********************************************************************************************************************
54 string ChimeraUchimeCommand::getHelpString(){
56 string helpString = "";
57 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
58 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
59 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
60 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
61 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
62 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
63 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
64 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
65 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
66 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
67 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
68 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
69 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
70 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
71 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
72 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
73 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
74 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
75 helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
76 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
77 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
78 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
79 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
80 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
81 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
82 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
84 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
86 helpString += "The chimera.uchime command should be in the following format: \n";
87 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
88 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
89 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
93 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97 //**********************************************************************************************************************
98 ChimeraUchimeCommand::ChimeraUchimeCommand(){
100 abort = true; calledHelp = true;
102 vector<string> tempOutNames;
103 outputTypes["chimera"] = tempOutNames;
104 outputTypes["accnos"] = tempOutNames;
105 outputTypes["alns"] = tempOutNames;
107 catch(exception& e) {
108 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112 //***************************************************************************************************************
113 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
115 abort = false; calledHelp = false;
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 vector<string> myArray = setParameters();
124 OptionParser parser(option);
125 map<string,string> parameters = parser.getParameters();
127 ValidParameters validParameter("chimera.uchime");
128 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 vector<string> tempOutNames;
136 outputTypes["chimera"] = tempOutNames;
137 outputTypes["accnos"] = tempOutNames;
138 outputTypes["alns"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
144 //check for required parameters
145 fastafile = validParameter.validFile(parameters, "fasta", false);
146 if (fastafile == "not found") {
147 //if there is a current fasta file, use it
148 string filename = m->getFastaFile();
149 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
152 m->splitAtDash(fastafile, fastaFileNames);
154 //go through files and make sure they are good, if not, then disregard them
155 for (int i = 0; i < fastaFileNames.size(); i++) {
158 if (fastaFileNames[i] == "current") {
159 fastaFileNames[i] = m->getFastaFile();
160 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
162 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
163 //erase from file list
164 fastaFileNames.erase(fastaFileNames.begin()+i);
171 if (inputDir != "") {
172 string path = m->hasPath(fastaFileNames[i]);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
180 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
182 //if you can't open it, try default location
183 if (ableToOpen == 1) {
184 if (m->getDefaultPath() != "") { //default path is set
185 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190 fastaFileNames[i] = tryPath;
194 if (ableToOpen == 1) {
195 if (m->getOutputDir() != "") { //default path is set
196 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
199 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
201 fastaFileNames[i] = tryPath;
207 if (ableToOpen == 1) {
208 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
209 //erase from file list
210 fastaFileNames.erase(fastaFileNames.begin()+i);
213 m->setFastaFile(fastaFileNames[i]);
218 //make sure there is at least one valid file left
219 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
223 //check for required parameters
225 namefile = validParameter.validFile(parameters, "name", false);
226 if (namefile == "not found") { namefile = ""; hasName = false; }
228 m->splitAtDash(namefile, nameFileNames);
230 //go through files and make sure they are good, if not, then disregard them
231 for (int i = 0; i < nameFileNames.size(); i++) {
234 if (nameFileNames[i] == "current") {
235 nameFileNames[i] = m->getNameFile();
236 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
238 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
239 //erase from file list
240 nameFileNames.erase(nameFileNames.begin()+i);
247 if (inputDir != "") {
248 string path = m->hasPath(nameFileNames[i]);
249 //if the user has not given a path then, add inputdir. else leave path alone.
250 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
256 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
258 //if you can't open it, try default location
259 if (ableToOpen == 1) {
260 if (m->getDefaultPath() != "") { //default path is set
261 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
262 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
264 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
266 nameFileNames[i] = tryPath;
270 if (ableToOpen == 1) {
271 if (m->getOutputDir() != "") { //default path is set
272 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
273 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
275 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
277 nameFileNames[i] = tryPath;
283 if (ableToOpen == 1) {
284 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
285 //erase from file list
286 nameFileNames.erase(nameFileNames.begin()+i);
289 m->setNameFile(nameFileNames[i]);
294 //make sure there is at least one valid file left
295 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
298 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
300 //if the user changes the output directory command factory will send this info to us in the output parameter
301 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
305 it = parameters.find("reference");
306 //user has given a template file
307 if(it != parameters.end()){
308 if (it->second == "self") { templatefile = "self"; }
310 path = m->hasPath(it->second);
311 //if the user has not given a path then, add inputdir. else leave path alone.
312 if (path == "") { parameters["reference"] = inputDir + it->second; }
314 templatefile = validParameter.validFile(parameters, "reference", true);
315 if (templatefile == "not open") { abort = true; }
316 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true; }
318 }else if (hasName) { templatefile = "self"; }
319 else { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true; }
322 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
323 m->setProcessors(temp);
324 convert(temp, processors);
326 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
327 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
329 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
330 chimealns = m->isTrue(temp);
332 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
333 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
334 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
335 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
336 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
337 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
338 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
339 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
340 minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
341 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
342 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
343 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
345 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
346 ucl = m->isTrue(temp);
348 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
349 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
351 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
352 skipgaps = m->isTrue(temp);
354 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
355 skipgaps2 = m->isTrue(temp);
359 catch(exception& e) {
360 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
364 //***************************************************************************************************************
366 int ChimeraUchimeCommand::execute(){
368 if (abort == true) { if (calledHelp) { return 0; } return 2; }
370 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
372 for (int s = 0; s < fastaFileNames.size(); s++) {
374 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
376 int start = time(NULL);
377 string nameFile = "";
379 if (templatefile == "self") { //you want to run uchime with a refernce template
383 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
384 if (pid == 0) { //you are the root process
387 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
388 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
389 nameFile = nameFileNames[s];
391 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
393 //use unique.seqs to create new name and fastafile
394 string inputString = "fasta=" + fastaFileNames[s];
395 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
396 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
398 Command* uniqueCommand = new DeconvoluteCommand(inputString);
399 uniqueCommand->execute();
401 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
403 delete uniqueCommand;
405 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
407 nameFile = filenames["name"][0];
408 fastaFileNames[s] = filenames["fasta"][0];
411 //create input file for uchime
412 //read through fastafile and store info
413 map<string, string> seqs;
415 m->openInputFile(fastaFileNames[s], in);
419 if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
421 Sequence seq(in); m->gobble(in);
422 seqs[seq.getName()] = seq.getAligned();
427 vector<seqPriorityNode> nameMapCount;
428 int error = m->readNames(nameFile, nameMapCount, seqs);
430 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
432 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
433 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
435 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
437 string newFasta = fastaFileNames[s] + ".temp";
439 m->openOutputFile(newFasta, out);
441 //print new file in order of
442 for (int i = 0; i < nameMapCount.size(); i++) {
443 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
447 fastaFileNames[s] = newFasta;
452 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
455 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
456 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
457 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
458 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
460 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
463 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
465 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
467 numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
469 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
471 //remove file made for uchime
472 if (templatefile == "self") { remove(fastaFileNames[s].c_str()); }
474 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
475 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
476 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
478 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
481 //set accnos file as new current accnosfile
483 itTypes = outputTypes.find("accnos");
484 if (itTypes != outputTypes.end()) {
485 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
488 m->mothurOutEndLine();
489 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
490 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
491 m->mothurOutEndLine();
496 catch(exception& e) {
497 m->errorOut(e, "ChimeraUchimeCommand", "execute");
501 //**********************************************************************************************************************
503 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
508 char* tempUchime = new char[8];
509 strcpy(tempUchime, "./uchime ");
510 cPara.push_back(tempUchime);
512 char* tempIn = new char[7];
513 strcpy(tempIn, "--input");
514 cPara.push_back(tempIn);
515 char* temp = new char[filename.length()];
516 strcpy(temp, filename.c_str());
517 cPara.push_back(temp);
519 //are you using a reference file
520 if (templatefile != "self") {
523 char* tempRef = new char[4];
524 strcpy(tempRef, "--db");
525 cPara.push_back(tempRef);
526 char* tempR = new char[templatefile.length()];
527 strcpy(tempR, templatefile.c_str());
528 cPara.push_back(tempR);
531 char* tempO = new char[11];
532 strcpy(tempO, "--uchimeout");
533 cPara.push_back(tempO);
534 char* tempout = new char[outputFName.length()];
535 strcpy(tempout, outputFName.c_str());
536 cPara.push_back(tempout);
539 char* tempA = new char[12];
540 strcpy(tempA, "--uchimealns");
541 cPara.push_back(tempA);
542 char* tempa = new char[alns.length()];
543 strcpy(tempa, alns.c_str());
544 cPara.push_back(tempa);
548 char* tempskew = new char[8];
549 strcpy(tempskew, "--abskew");
550 cPara.push_back(tempskew);
551 char* tempSkew = new char[abskew.length()];
552 strcpy(tempSkew, abskew.c_str());
553 cPara.push_back(tempSkew);
557 char* tempminh = new char[6];
558 strcpy(tempminh, "--minh");
559 cPara.push_back(tempminh);
560 char* tempMinH = new char[minh.length()];
561 strcpy(tempMinH, minh.c_str());
562 cPara.push_back(tempMinH);
566 char* tempmindiv = new char[8];
567 strcpy(tempmindiv, "--mindiv");
568 cPara.push_back(tempmindiv);
569 char* tempMindiv = new char[mindiv.length()];
570 strcpy(tempMindiv, mindiv.c_str());
571 cPara.push_back(tempMindiv);
575 char* tempxn = new char[4];
576 strcpy(tempxn, "--xn");
577 cPara.push_back(tempxn);
578 char* tempXn = new char[xn.length()];
579 strcpy(tempXn, xn.c_str());
580 cPara.push_back(tempXn);
584 char* tempdn = new char[4];
585 strcpy(tempdn, "--dn");
586 cPara.push_back(tempdn);
587 char* tempDn = new char[dn.length()];
588 strcpy(tempDn, dn.c_str());
589 cPara.push_back(tempDn);
593 char* tempxa = new char[4];
594 strcpy(tempxa, "--xa");
595 cPara.push_back(tempxa);
596 char* tempXa = new char[xa.length()];
597 strcpy(tempXa, xa.c_str());
598 cPara.push_back(tempXa);
602 char* tempchunks = new char[8];
603 strcpy(tempchunks, "--chunks");
604 cPara.push_back(tempchunks);
605 char* tempChunks = new char[chunks.length()];
606 strcpy(tempChunks, chunks.c_str());
607 cPara.push_back(tempChunks);
611 char* tempminchunk = new char[10];
612 strcpy(tempminchunk, "--minchunk");
613 cPara.push_back(tempminchunk);
614 char* tempMinchunk = new char[minchunk.length()];
615 strcpy(tempMinchunk, minchunk.c_str());
616 cPara.push_back(tempMinchunk);
619 if (useIdsmoothwindow) {
620 char* tempidsmoothwindow = new char[16];
621 strcpy(tempidsmoothwindow, "--idsmoothwindow");
622 cPara.push_back(tempidsmoothwindow);
623 char* tempIdsmoothwindow = new char[idsmoothwindow.length()];
624 strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
625 cPara.push_back(tempIdsmoothwindow);
628 if (useMinsmoothid) {
629 char* tempminsmoothid = new char[13];
630 strcpy(tempminsmoothid, "--minsmoothid");
631 cPara.push_back(tempminsmoothid);
632 char* tempMinsmoothid = new char[minsmoothid.length()];
633 strcpy(tempMinsmoothid, minsmoothid.c_str());
634 cPara.push_back(tempMinsmoothid);
638 char* tempmaxp = new char[6];
639 strcpy(tempmaxp, "--maxp");
640 cPara.push_back(tempmaxp);
641 char* tempMaxp = new char[maxp.length()];
642 strcpy(tempMaxp, maxp.c_str());
643 cPara.push_back(tempMaxp);
647 char* tempskipgaps = new char[14];
648 strcpy(tempskipgaps, "--[no]skipgaps");
649 cPara.push_back(tempskipgaps);
653 char* tempskipgaps2 = new char[15];
654 strcpy(tempskipgaps2, "--[no]skipgaps2");
655 cPara.push_back(tempskipgaps2);
659 char* tempminlen = new char[8];
660 strcpy(tempminlen, "--minlen");
661 cPara.push_back(tempminlen);
662 char* tempMinlen = new char[minlen.length()];
663 strcpy(tempMinlen, minlen.c_str());
664 cPara.push_back(tempMinlen);
668 char* tempmaxlen = new char[8];
669 strcpy(tempmaxlen, "--maxlen");
670 cPara.push_back(tempmaxlen);
671 char* tempMaxlen = new char[maxlen.length()];
672 strcpy(tempMaxlen, maxlen.c_str());
673 cPara.push_back(tempMaxlen);
677 char* tempucl = new char[5];
678 strcpy(tempucl, "--ucl");
679 cPara.push_back(tempucl);
683 char* tempqueryfract = new char[12];
684 strcpy(tempqueryfract, "--queryfract");
685 cPara.push_back(tempqueryfract);
686 char* tempQueryfract = new char[queryfract.length()];
687 strcpy(tempQueryfract, queryfract.c_str());
688 cPara.push_back(tempQueryfract);
692 char** uchimeParameters;
693 uchimeParameters = new char*[cPara.size()];
694 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; }
695 int numArgs = cPara.size();
697 uchime_main(numArgs, uchimeParameters);
700 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
701 delete[] uchimeParameters;
703 if (m->control_pressed) { return 0; }
705 //create accnos file from uchime results
707 m->openInputFile(outputFName, in);
710 m->openOutputFile(accnos, out);
715 if (m->control_pressed) { break; }
718 string chimeraFlag = "";
719 in >> chimeraFlag >> name;
722 if (templatefile == "self") {
723 name = name.substr(0, name.length()-1); //rip off last /
724 name = name.substr(0, name.find_last_of('/'));
727 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
730 if (chimeraFlag == "Y") { out << name << endl; }
738 catch(exception& e) {
739 m->errorOut(e, "ChimeraUchimeCommand", "driver");
743 /**************************************************************************************************/
745 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
752 //break up file into multiple files
753 vector<string> files;
754 m->divideFile(filename, processors, files);
756 if (m->control_pressed) { return 0; }
759 int pid, numSeqsPerProcessor;
763 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
764 MPI_Comm_size(MPI_COMM_WORLD, &processors);
766 if (pid == 0) { //you are the root process
767 num = driver(outputFileName, files[0], accnos, alns);
769 if (templatefile != "self") {
771 for(int j = 1; j < processors; j++) {
773 MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
776 m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
777 remove((outputFileName + toString(j) + ".temp").c_str());
779 m->appendFiles((accnos + toString(j) + ".temp"), accnos);
780 remove((accnos + toString(j) + ".temp").c_str());
783 m->appendFiles((alns + toString(j) + ".temp"), alns);
784 remove((alns + toString(j) + ".temp").c_str());
788 }else{ //you are a child process
789 if (templatefile != "self") { //if template=self we can only use 1 processor
790 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp");
792 //send numSeqs to parent
793 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
797 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
800 //loop through and create all the processes you want
801 while (process != processors) {
805 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
808 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
810 //pass numSeqs to parent
812 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
813 m->openOutputFile(tempFile, out);
819 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
820 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
826 num = driver(outputFileName, files[0], accnos, alns);
828 //force parent to wait until all the processes are done
829 for (int i=0;i<processIDS.size();i++) {
830 int temp = processIDS[i];
834 for (int i = 0; i < processIDS.size(); i++) {
836 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
837 m->openInputFile(tempFile, in);
838 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
839 in.close(); remove(tempFile.c_str());
843 //append output files
844 for(int i=0;i<processIDS[i];i++){
845 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
846 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
848 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
849 remove((accnos + toString(processIDS[i]) + ".temp").c_str());
852 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
853 remove((alns + toString(processIDS[i]) + ".temp").c_str());
857 //get rid of the file pieces.
858 for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
862 catch(exception& e) {
863 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
868 /**************************************************************************************************/