2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
27 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
28 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
29 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
30 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
31 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
32 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
33 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
34 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
35 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
36 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
37 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
38 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
39 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
40 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
41 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
42 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
43 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45 vector<string> myArray;
46 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
50 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
54 //**********************************************************************************************************************
55 string ChimeraUchimeCommand::getHelpString(){
57 string helpString = "";
58 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
59 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
60 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
61 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
62 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
63 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
64 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
65 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
66 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
67 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
68 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
69 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
70 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
71 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
72 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
73 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
74 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
75 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
76 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
77 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
78 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
79 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
80 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
81 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
82 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
83 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
85 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
87 helpString += "The chimera.uchime command should be in the following format: \n";
88 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
89 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
90 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
94 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
98 //**********************************************************************************************************************
99 ChimeraUchimeCommand::ChimeraUchimeCommand(){
101 abort = true; calledHelp = true;
103 vector<string> tempOutNames;
104 outputTypes["chimera"] = tempOutNames;
105 outputTypes["accnos"] = tempOutNames;
106 outputTypes["alns"] = tempOutNames;
108 catch(exception& e) {
109 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
113 //***************************************************************************************************************
114 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
116 abort = false; calledHelp = false;
117 ReferenceDB* rdb = ReferenceDB::getInstance();
119 //allow user to run help
120 if(option == "help") { help(); abort = true; calledHelp = true; }
121 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124 vector<string> myArray = setParameters();
126 OptionParser parser(option);
127 map<string,string> parameters = parser.getParameters();
129 ValidParameters validParameter("chimera.uchime");
130 map<string,string>::iterator it;
132 //check to make sure all parameters are valid for command
133 for (it = parameters.begin(); it != parameters.end(); it++) {
134 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
137 vector<string> tempOutNames;
138 outputTypes["chimera"] = tempOutNames;
139 outputTypes["accnos"] = tempOutNames;
140 outputTypes["alns"] = tempOutNames;
142 //if the user changes the input directory command factory will send this info to us in the output parameter
143 string inputDir = validParameter.validFile(parameters, "inputdir", false);
144 if (inputDir == "not found"){ inputDir = ""; }
146 //check for required parameters
147 fastafile = validParameter.validFile(parameters, "fasta", false);
148 if (fastafile == "not found") {
149 //if there is a current fasta file, use it
150 string filename = m->getFastaFile();
151 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
152 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
154 m->splitAtDash(fastafile, fastaFileNames);
156 //go through files and make sure they are good, if not, then disregard them
157 for (int i = 0; i < fastaFileNames.size(); i++) {
160 if (fastaFileNames[i] == "current") {
161 fastaFileNames[i] = m->getFastaFile();
162 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
164 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
165 //erase from file list
166 fastaFileNames.erase(fastaFileNames.begin()+i);
173 if (inputDir != "") {
174 string path = m->hasPath(fastaFileNames[i]);
175 //if the user has not given a path then, add inputdir. else leave path alone.
176 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
182 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
184 //if you can't open it, try default location
185 if (ableToOpen == 1) {
186 if (m->getDefaultPath() != "") { //default path is set
187 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
188 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
190 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
192 fastaFileNames[i] = tryPath;
196 if (ableToOpen == 1) {
197 if (m->getOutputDir() != "") { //default path is set
198 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
199 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
201 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
203 fastaFileNames[i] = tryPath;
209 if (ableToOpen == 1) {
210 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
211 //erase from file list
212 fastaFileNames.erase(fastaFileNames.begin()+i);
215 m->setFastaFile(fastaFileNames[i]);
220 //make sure there is at least one valid file left
221 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
225 //check for required parameters
227 namefile = validParameter.validFile(parameters, "name", false);
228 if (namefile == "not found") { namefile = ""; hasName = false; }
230 m->splitAtDash(namefile, nameFileNames);
232 //go through files and make sure they are good, if not, then disregard them
233 for (int i = 0; i < nameFileNames.size(); i++) {
236 if (nameFileNames[i] == "current") {
237 nameFileNames[i] = m->getNameFile();
238 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
240 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
241 //erase from file list
242 nameFileNames.erase(nameFileNames.begin()+i);
249 if (inputDir != "") {
250 string path = m->hasPath(nameFileNames[i]);
251 //if the user has not given a path then, add inputdir. else leave path alone.
252 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
258 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
260 //if you can't open it, try default location
261 if (ableToOpen == 1) {
262 if (m->getDefaultPath() != "") { //default path is set
263 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
264 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
266 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
268 nameFileNames[i] = tryPath;
272 if (ableToOpen == 1) {
273 if (m->getOutputDir() != "") { //default path is set
274 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
275 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
277 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
279 nameFileNames[i] = tryPath;
285 if (ableToOpen == 1) {
286 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
287 //erase from file list
288 nameFileNames.erase(nameFileNames.begin()+i);
291 m->setNameFile(nameFileNames[i]);
296 //make sure there is at least one valid file left
297 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
300 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
302 //if the user changes the output directory command factory will send this info to us in the output parameter
303 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
306 it = parameters.find("reference");
307 //user has given a template file
308 if(it != parameters.end()){
309 if (it->second == "self") { templatefile = "self"; }
311 path = m->hasPath(it->second);
312 //if the user has not given a path then, add inputdir. else leave path alone.
313 if (path == "") { parameters["reference"] = inputDir + it->second; }
315 templatefile = validParameter.validFile(parameters, "reference", true);
316 if (templatefile == "not open") { abort = true; }
317 else if (templatefile == "not found") { //check for saved reference sequences
318 if (rdb->getSavedReference() != "") {
319 templatefile = rdb->getSavedReference();
320 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
322 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
323 m->mothurOutEndLine();
328 }else if (hasName) { templatefile = "self"; }
330 if (rdb->getSavedReference() != "") {
331 templatefile = rdb->getSavedReference();
332 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
334 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
335 m->mothurOutEndLine();
336 templatefile = ""; abort = true;
340 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
341 m->setProcessors(temp);
342 convert(temp, processors);
344 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
345 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
347 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
348 chimealns = m->isTrue(temp);
350 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
351 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
352 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
353 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
354 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
355 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
356 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
357 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
358 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
359 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
360 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
361 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
363 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
364 ucl = m->isTrue(temp);
366 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
367 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
369 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
370 skipgaps = m->isTrue(temp);
372 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
373 skipgaps2 = m->isTrue(temp);
375 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
378 catch(exception& e) {
379 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
383 //***************************************************************************************************************
385 int ChimeraUchimeCommand::execute(){
387 if (abort == true) { if (calledHelp) { return 0; } return 2; }
389 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
391 for (int s = 0; s < fastaFileNames.size(); s++) {
393 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
395 int start = time(NULL);
396 string nameFile = "";
398 if (templatefile == "self") { //you want to run uchime with a reference template
402 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
403 if (pid == 0) { //you are the root process
406 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
407 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
408 nameFile = nameFileNames[s];
410 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
412 //use unique.seqs to create new name and fastafile
413 string inputString = "fasta=" + fastaFileNames[s];
414 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
415 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
417 Command* uniqueCommand = new DeconvoluteCommand(inputString);
418 uniqueCommand->execute();
420 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
422 delete uniqueCommand;
424 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
426 nameFile = filenames["name"][0];
427 fastaFileNames[s] = filenames["fasta"][0];
430 //create input file for uchime
431 //read through fastafile and store info
432 map<string, string> seqs;
434 m->openInputFile(fastaFileNames[s], in);
438 if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
440 Sequence seq(in); m->gobble(in);
441 seqs[seq.getName()] = seq.getAligned();
446 vector<seqPriorityNode> nameMapCount;
447 int error = m->readNames(nameFile, nameMapCount, seqs);
449 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
451 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
452 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
454 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
456 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
458 m->openOutputFile(newFasta, out);
460 //print new file in order of
461 for (int i = 0; i < nameMapCount.size(); i++) {
462 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
466 fastaFileNames[s] = newFasta;
471 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
474 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
475 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
476 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
477 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
479 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
482 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
483 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
484 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
486 numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
488 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
490 //remove file made for uchime
491 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
493 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
494 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
495 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
497 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
500 //set accnos file as new current accnosfile
502 itTypes = outputTypes.find("accnos");
503 if (itTypes != outputTypes.end()) {
504 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
507 m->mothurOutEndLine();
508 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
509 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
510 m->mothurOutEndLine();
515 catch(exception& e) {
516 m->errorOut(e, "ChimeraUchimeCommand", "execute");
520 //**********************************************************************************************************************
522 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
527 char* tempUchime = new char[8];
528 strcpy(tempUchime, "./uchime ");
529 cPara.push_back(tempUchime);
531 char* tempIn = new char[8];
532 *tempIn = '\0'; strncat(tempIn, "--input", 7);
533 //strcpy(tempIn, "--input");
534 cPara.push_back(tempIn);
535 char* temp = new char[filename.length()+1];
536 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
537 //strcpy(temp, filename.c_str());
538 cPara.push_back(temp);
540 //are you using a reference file
541 if (templatefile != "self") {
543 char* tempRef = new char[5];
544 //strcpy(tempRef, "--db");
545 *tempRef = '\0'; strncat(tempRef, "--db", 4);
546 cPara.push_back(tempRef);
547 char* tempR = new char[templatefile.length()+1];
548 //strcpy(tempR, templatefile.c_str());
549 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
550 cPara.push_back(tempR);
553 char* tempO = new char[12];
554 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
555 //strcpy(tempO, "--uchimeout");
556 cPara.push_back(tempO);
557 char* tempout = new char[outputFName.length()+1];
558 //strcpy(tempout, outputFName.c_str());
559 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
560 cPara.push_back(tempout);
563 char* tempA = new char[13];
564 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
565 //strcpy(tempA, "--uchimealns");
566 cPara.push_back(tempA);
567 char* tempa = new char[alns.length()+1];
568 //strcpy(tempa, alns.c_str());
569 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
570 cPara.push_back(tempa);
574 char* tempskew = new char[9];
575 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
576 //strcpy(tempskew, "--abskew");
577 cPara.push_back(tempskew);
578 char* tempSkew = new char[abskew.length()+1];
579 //strcpy(tempSkew, abskew.c_str());
580 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
581 cPara.push_back(tempSkew);
585 char* tempminh = new char[7];
586 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
587 //strcpy(tempminh, "--minh");
588 cPara.push_back(tempminh);
589 char* tempMinH = new char[minh.length()+1];
590 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
591 //strcpy(tempMinH, minh.c_str());
592 cPara.push_back(tempMinH);
596 char* tempmindiv = new char[9];
597 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
598 //strcpy(tempmindiv, "--mindiv");
599 cPara.push_back(tempmindiv);
600 char* tempMindiv = new char[mindiv.length()+1];
601 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
602 //strcpy(tempMindiv, mindiv.c_str());
603 cPara.push_back(tempMindiv);
607 char* tempxn = new char[5];
608 //strcpy(tempxn, "--xn");
609 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
610 cPara.push_back(tempxn);
611 char* tempXn = new char[xn.length()+1];
612 //strcpy(tempXn, xn.c_str());
613 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
614 cPara.push_back(tempXn);
618 char* tempdn = new char[5];
619 //strcpy(tempdn, "--dn");
620 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
621 cPara.push_back(tempdn);
622 char* tempDn = new char[dn.length()+1];
623 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
624 //strcpy(tempDn, dn.c_str());
625 cPara.push_back(tempDn);
629 char* tempxa = new char[5];
630 //strcpy(tempxa, "--xa");
631 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
632 cPara.push_back(tempxa);
633 char* tempXa = new char[xa.length()+1];
634 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
635 //strcpy(tempXa, xa.c_str());
636 cPara.push_back(tempXa);
640 char* tempchunks = new char[9];
641 //strcpy(tempchunks, "--chunks");
642 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
643 cPara.push_back(tempchunks);
644 char* tempChunks = new char[chunks.length()+1];
645 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
646 //strcpy(tempChunks, chunks.c_str());
647 cPara.push_back(tempChunks);
651 char* tempminchunk = new char[11];
652 //strcpy(tempminchunk, "--minchunk");
653 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
654 cPara.push_back(tempminchunk);
655 char* tempMinchunk = new char[minchunk.length()+1];
656 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
657 //strcpy(tempMinchunk, minchunk.c_str());
658 cPara.push_back(tempMinchunk);
661 if (useIdsmoothwindow) {
662 char* tempidsmoothwindow = new char[17];
663 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
664 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
665 cPara.push_back(tempidsmoothwindow);
666 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
667 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
668 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
669 cPara.push_back(tempIdsmoothwindow);
672 /*if (useMinsmoothid) {
673 char* tempminsmoothid = new char[14];
674 //strcpy(tempminsmoothid, "--minsmoothid");
675 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
676 cPara.push_back(tempminsmoothid);
677 char* tempMinsmoothid = new char[minsmoothid.length()+1];
678 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
679 //strcpy(tempMinsmoothid, minsmoothid.c_str());
680 cPara.push_back(tempMinsmoothid);
684 char* tempmaxp = new char[7];
685 //strcpy(tempmaxp, "--maxp");
686 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
687 cPara.push_back(tempmaxp);
688 char* tempMaxp = new char[maxp.length()+1];
689 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
690 //strcpy(tempMaxp, maxp.c_str());
691 cPara.push_back(tempMaxp);
695 char* tempskipgaps = new char[13];
696 //strcpy(tempskipgaps, "--[no]skipgaps");
697 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
698 cPara.push_back(tempskipgaps);
702 char* tempskipgaps2 = new char[14];
703 //strcpy(tempskipgaps2, "--[no]skipgaps2");
704 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
705 cPara.push_back(tempskipgaps2);
709 char* tempminlen = new char[9];
710 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
711 //strcpy(tempminlen, "--minlen");
712 cPara.push_back(tempminlen);
713 char* tempMinlen = new char[minlen.length()+1];
714 //strcpy(tempMinlen, minlen.c_str());
715 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
716 cPara.push_back(tempMinlen);
720 char* tempmaxlen = new char[9];
721 //strcpy(tempmaxlen, "--maxlen");
722 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
723 cPara.push_back(tempmaxlen);
724 char* tempMaxlen = new char[maxlen.length()+1];
725 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
726 //strcpy(tempMaxlen, maxlen.c_str());
727 cPara.push_back(tempMaxlen);
731 char* tempucl = new char[5];
732 strcpy(tempucl, "--ucl");
733 cPara.push_back(tempucl);
737 char* tempqueryfract = new char[13];
738 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
739 //strcpy(tempqueryfract, "--queryfract");
740 cPara.push_back(tempqueryfract);
741 char* tempQueryfract = new char[queryfract.length()+1];
742 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
743 //strcpy(tempQueryfract, queryfract.c_str());
744 cPara.push_back(tempQueryfract);
748 char** uchimeParameters;
749 uchimeParameters = new char*[cPara.size()];
750 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; }
751 int numArgs = cPara.size();
753 uchime_main(numArgs, uchimeParameters);
756 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
757 delete[] uchimeParameters;
759 if (m->control_pressed) { return 0; }
761 //create accnos file from uchime results
763 m->openInputFile(outputFName, in);
766 m->openOutputFile(accnos, out);
771 if (m->control_pressed) { break; }
774 string chimeraFlag = "";
775 in >> chimeraFlag >> name;
778 if (templatefile == "self") {
779 name = name.substr(0, name.length()-1); //rip off last /
780 name = name.substr(0, name.find_last_of('/'));
783 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
786 if (chimeraFlag == "Y") { out << name << endl; }
794 catch(exception& e) {
795 m->errorOut(e, "ChimeraUchimeCommand", "driver");
799 /**************************************************************************************************/
801 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
807 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
808 //break up file into multiple files
809 vector<string> files;
810 m->divideFile(filename, processors, files);
812 if (m->control_pressed) { return 0; }
815 int pid, numSeqsPerProcessor;
819 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
820 MPI_Comm_size(MPI_COMM_WORLD, &processors);
822 if (pid == 0) { //you are the root process
823 num = driver(outputFileName, files[0], accnos, alns);
825 if (templatefile != "self") {
827 for(int j = 1; j < processors; j++) {
829 MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
832 m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
833 m->mothurRemove((outputFileName + toString(j) + ".temp"));
835 m->appendFiles((accnos + toString(j) + ".temp"), accnos);
836 m->mothurRemove((accnos + toString(j) + ".temp"));
839 m->appendFiles((alns + toString(j) + ".temp"), alns);
840 m->mothurRemove((alns + toString(j) + ".temp"));
844 }else{ //you are a child process
845 if (templatefile != "self") { //if template=self we can only use 1 processor
846 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp");
848 //send numSeqs to parent
849 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
853 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
856 //loop through and create all the processes you want
857 while (process != processors) {
861 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
864 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
866 //pass numSeqs to parent
868 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
869 m->openOutputFile(tempFile, out);
875 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
876 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
882 num = driver(outputFileName, files[0], accnos, alns);
884 //force parent to wait until all the processes are done
885 for (int i=0;i<processIDS.size();i++) {
886 int temp = processIDS[i];
890 for (int i = 0; i < processIDS.size(); i++) {
892 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
893 m->openInputFile(tempFile, in);
894 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
895 in.close(); m->mothurRemove(tempFile);
899 //append output files
900 for(int i=0;i<processIDS[i];i++){
901 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
902 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
904 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
905 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
908 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
909 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
913 //get rid of the file pieces.
914 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
918 catch(exception& e) {
919 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
924 /**************************************************************************************************/