2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
68 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
87 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
89 helpString += "The chimera.uchime command should be in the following format: \n";
90 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
96 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){
103 abort = true; calledHelp = true;
105 vector<string> tempOutNames;
106 outputTypes["chimera"] = tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
108 outputTypes["alns"] = tempOutNames;
110 catch(exception& e) {
111 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
118 abort = false; calledHelp = false;
119 ReferenceDB* rdb = ReferenceDB::getInstance();
121 //allow user to run help
122 if(option == "help") { help(); abort = true; calledHelp = true; }
123 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
126 vector<string> myArray = setParameters();
128 OptionParser parser(option);
129 map<string,string> parameters = parser.getParameters();
131 ValidParameters validParameter("chimera.uchime");
132 map<string,string>::iterator it;
134 //check to make sure all parameters are valid for command
135 for (it = parameters.begin(); it != parameters.end(); it++) {
136 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
139 vector<string> tempOutNames;
140 outputTypes["chimera"] = tempOutNames;
141 outputTypes["accnos"] = tempOutNames;
142 outputTypes["alns"] = tempOutNames;
144 //if the user changes the input directory command factory will send this info to us in the output parameter
145 string inputDir = validParameter.validFile(parameters, "inputdir", false);
146 if (inputDir == "not found"){ inputDir = ""; }
148 //check for required parameters
149 fastafile = validParameter.validFile(parameters, "fasta", false);
150 if (fastafile == "not found") {
151 //if there is a current fasta file, use it
152 string filename = m->getFastaFile();
153 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
156 m->splitAtDash(fastafile, fastaFileNames);
158 //go through files and make sure they are good, if not, then disregard them
159 for (int i = 0; i < fastaFileNames.size(); i++) {
162 if (fastaFileNames[i] == "current") {
163 fastaFileNames[i] = m->getFastaFile();
164 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
166 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
167 //erase from file list
168 fastaFileNames.erase(fastaFileNames.begin()+i);
175 if (inputDir != "") {
176 string path = m->hasPath(fastaFileNames[i]);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
184 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
186 //if you can't open it, try default location
187 if (ableToOpen == 1) {
188 if (m->getDefaultPath() != "") { //default path is set
189 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
192 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
194 fastaFileNames[i] = tryPath;
198 if (ableToOpen == 1) {
199 if (m->getOutputDir() != "") { //default path is set
200 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
203 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
205 fastaFileNames[i] = tryPath;
211 if (ableToOpen == 1) {
212 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
213 //erase from file list
214 fastaFileNames.erase(fastaFileNames.begin()+i);
217 m->setFastaFile(fastaFileNames[i]);
222 //make sure there is at least one valid file left
223 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
227 //check for required parameters
229 namefile = validParameter.validFile(parameters, "name", false);
230 if (namefile == "not found") { namefile = ""; hasName = false; }
232 m->splitAtDash(namefile, nameFileNames);
234 //go through files and make sure they are good, if not, then disregard them
235 for (int i = 0; i < nameFileNames.size(); i++) {
238 if (nameFileNames[i] == "current") {
239 nameFileNames[i] = m->getNameFile();
240 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
242 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
243 //erase from file list
244 nameFileNames.erase(nameFileNames.begin()+i);
251 if (inputDir != "") {
252 string path = m->hasPath(nameFileNames[i]);
253 //if the user has not given a path then, add inputdir. else leave path alone.
254 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
260 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
262 //if you can't open it, try default location
263 if (ableToOpen == 1) {
264 if (m->getDefaultPath() != "") { //default path is set
265 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
268 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
270 nameFileNames[i] = tryPath;
274 if (ableToOpen == 1) {
275 if (m->getOutputDir() != "") { //default path is set
276 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
279 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
281 nameFileNames[i] = tryPath;
287 if (ableToOpen == 1) {
288 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
289 //erase from file list
290 nameFileNames.erase(nameFileNames.begin()+i);
293 m->setNameFile(nameFileNames[i]);
298 //make sure there is at least one valid file left
299 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
302 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
304 bool hasGroup = true;
305 groupfile = validParameter.validFile(parameters, "group", false);
306 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
308 m->splitAtDash(groupfile, groupFileNames);
310 //go through files and make sure they are good, if not, then disregard them
311 for (int i = 0; i < groupFileNames.size(); i++) {
314 if (groupFileNames[i] == "current") {
315 groupFileNames[i] = m->getGroupFile();
316 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
318 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
319 //erase from file list
320 groupFileNames.erase(groupFileNames.begin()+i);
327 if (inputDir != "") {
328 string path = m->hasPath(groupFileNames[i]);
329 //if the user has not given a path then, add inputdir. else leave path alone.
330 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
336 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
338 //if you can't open it, try default location
339 if (ableToOpen == 1) {
340 if (m->getDefaultPath() != "") { //default path is set
341 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
344 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
346 groupFileNames[i] = tryPath;
350 if (ableToOpen == 1) {
351 if (m->getOutputDir() != "") { //default path is set
352 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
355 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
357 groupFileNames[i] = tryPath;
363 if (ableToOpen == 1) {
364 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
365 //erase from file list
366 groupFileNames.erase(groupFileNames.begin()+i);
369 m->setGroupFile(groupFileNames[i]);
374 //make sure there is at least one valid file left
375 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
378 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
381 //if the user changes the output directory command factory will send this info to us in the output parameter
382 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
385 it = parameters.find("reference");
386 //user has given a template file
387 if(it != parameters.end()){
388 if (it->second == "self") { templatefile = "self"; }
390 path = m->hasPath(it->second);
391 //if the user has not given a path then, add inputdir. else leave path alone.
392 if (path == "") { parameters["reference"] = inputDir + it->second; }
394 templatefile = validParameter.validFile(parameters, "reference", true);
395 if (templatefile == "not open") { abort = true; }
396 else if (templatefile == "not found") { //check for saved reference sequences
397 if (rdb->getSavedReference() != "") {
398 templatefile = rdb->getSavedReference();
399 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
401 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
402 m->mothurOutEndLine();
407 }else if (hasName) { templatefile = "self"; }
409 if (rdb->getSavedReference() != "") {
410 templatefile = rdb->getSavedReference();
411 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
413 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
414 m->mothurOutEndLine();
415 templatefile = ""; abort = true;
419 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
420 m->setProcessors(temp);
421 m->mothurConvert(temp, processors);
423 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
424 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
426 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
427 chimealns = m->isTrue(temp);
429 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
430 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
431 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
432 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
433 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
434 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
435 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
436 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
437 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
438 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
439 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
440 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
442 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
443 ucl = m->isTrue(temp);
445 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
446 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
448 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
449 skipgaps = m->isTrue(temp);
451 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
452 skipgaps2 = m->isTrue(temp);
454 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
457 //look for uchime exe
459 string tempPath = path;
460 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461 path = path.substr(0, (tempPath.find_last_of('m')));
463 string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
465 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
467 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
468 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
469 newCommand->execute();
471 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
472 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
473 newCommand->execute();
477 uchimeCommand = path + "uchime.exe";
480 //test to make sure uchime exists
482 uchimeCommand = m->getFullPathName(uchimeCommand);
483 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
484 if(ableToOpen == 1) {
485 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
486 //check to see if uchime is in the path??
488 string uLocation = m->findProgramPath("uchime");
492 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
493 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
495 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
498 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
499 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
500 }else { uchimeLocation = uchimeCommand; }
502 uchimeLocation = m->getFullPathName(uchimeLocation);
505 catch(exception& e) {
506 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
510 //***************************************************************************************************************
512 int ChimeraUchimeCommand::execute(){
514 if (abort == true) { if (calledHelp) { return 0; } return 2; }
516 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
518 for (int s = 0; s < fastaFileNames.size(); s++) {
520 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
522 int start = time(NULL);
523 string nameFile = "";
524 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
525 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
526 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
527 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
528 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
530 //you provided a groupfile
531 string groupFile = "";
532 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
534 if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
536 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
537 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
538 nameFile = nameFileNames[s];
539 }else { nameFile = getNamesFile(fastaFileNames[s]); }
541 map<string, string> seqs;
542 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
545 vector<seqPriorityNode> nameMapCount;
546 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
547 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
548 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
550 printFile(nameMapCount, newFasta);
551 fastaFileNames[s] = newFasta;
554 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
556 if (groupFile != "") {
557 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
558 nameFile = nameFileNames[s];
559 }else { nameFile = getNamesFile(fastaFileNames[s]); }
561 //Parse sequences by group
562 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
563 vector<string> groups = parser.getNamesOfGroups();
565 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
568 ofstream out, out1, out2;
569 m->openOutputFile(outputFileName, out); out.close();
570 m->openOutputFile(accnosFileName, out1); out1.close();
571 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
574 if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
575 else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
577 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
579 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
581 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
582 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
584 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
587 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
592 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
593 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
597 m->openOutputFile(outputFileName+".temp", out);
598 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
601 m->appendFiles(outputFileName, outputFileName+".temp");
602 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
604 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
606 //remove file made for uchime
607 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
609 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
612 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
613 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
614 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
617 //set accnos file as new current accnosfile
619 itTypes = outputTypes.find("accnos");
620 if (itTypes != outputTypes.end()) {
621 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
624 m->mothurOutEndLine();
625 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
626 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
627 m->mothurOutEndLine();
632 catch(exception& e) {
633 m->errorOut(e, "ChimeraUchimeCommand", "execute");
637 //**********************************************************************************************************************
638 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
640 map<string, string> uniqueNames = parser.getAllSeqsMap();
641 map<string, string>::iterator itUnique;
646 m->openInputFile(accnosFileName, in2);
649 m->openOutputFile(accnosFileName+".temp", out2);
652 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
653 set<string>::iterator itNames;
654 set<string> chimerasInFile;
655 set<string>::iterator itChimeras;
659 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
661 in2 >> name; m->gobble(in2);
664 itUnique = uniqueNames.find(name);
666 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
668 itChimeras = chimerasInFile.find((itUnique->second));
670 if (itChimeras == chimerasInFile.end()) {
671 out2 << itUnique->second << endl;
672 chimerasInFile.insert((itUnique->second));
680 m->mothurRemove(accnosFileName);
681 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
687 m->openInputFile(outputFileName, in);
690 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
691 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
694 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
697 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
698 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
699 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
700 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
705 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
708 in >> temp1; m->gobble(in);
709 in >> name; m->gobble(in);
710 in >> parent1; m->gobble(in);
711 in >> parent2; m->gobble(in);
712 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
715 //parse name - name will look like U68590/ab=1/
716 string restOfName = "";
717 int pos = name.find_first_of('/');
718 if (pos != string::npos) {
719 restOfName = name.substr(pos);
720 name = name.substr(0, pos);
724 itUnique = uniqueNames.find(name);
726 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
728 name = itUnique->second;
729 //is this name already in the file
730 itNames = namesInFile.find((name));
732 if (itNames == namesInFile.end()) { //no not in file
733 if (flag == "N") { //are you really a no??
734 //is this sequence really not chimeric??
735 itChimeras = chimerasInFile.find(name);
737 //then you really are a no so print, otherwise skip
738 if (itChimeras == chimerasInFile.end()) { print = true; }
739 }else{ print = true; }
744 out << temp1 << '\t' << name << restOfName << '\t';
745 namesInFile.insert(name);
747 //parse parent1 names
748 if (parent1 != "*") {
750 pos = parent1.find_first_of('/');
751 if (pos != string::npos) {
752 restOfName = parent1.substr(pos);
753 parent1 = parent1.substr(0, pos);
756 itUnique = uniqueNames.find(parent1);
757 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
758 else { out << itUnique->second << restOfName << '\t'; }
759 }else { out << parent1 << '\t'; }
761 //parse parent2 names
762 if (parent2 != "*") {
764 pos = parent2.find_first_of('/');
765 if (pos != string::npos) {
766 restOfName = parent2.substr(pos);
767 parent2 = parent2.substr(0, pos);
770 itUnique = uniqueNames.find(parent2);
771 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
772 else { out << itUnique->second << restOfName << '\t'; }
773 }else { out << parent2 << '\t'; }
775 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
781 m->mothurRemove(outputFileName);
782 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
786 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
788 ------------------------------------------------------------------------
789 Query ( 179 nt) F21Fcsw_11639/ab=591/
790 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
791 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
793 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
794 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
795 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
796 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
797 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
798 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
800 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
801 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
802 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
803 Diffs NNN N N N N N BB NNN
804 Votes 000 0 0 0 0 0 ++ 000
805 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
807 A 159 TGGAACTGAGACACGGTCCAA 179
808 Q 159 TGGAACTGAGACACGGTCCAA 179
809 B 161 TGGAACTGAGACACGGTCCAA 181
812 Model BBBBBBBBBBBBBBBBBBBBB
814 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
815 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
819 m->openInputFile(alnsFileName, in3);
822 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
829 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
832 line = m->getline(in3);
836 istringstream iss(line);
839 //are you a name line
840 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
842 for (int i = 0; i < line.length(); i++) {
844 if (line[i] == ')') { break; }
845 else { out3 << line[i]; }
848 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
849 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
851 out << line[spot] << line[spot+1];
853 name = line.substr(spot+2);
855 //parse name - name will either look like U68590/ab=1/ or U68590
856 string restOfName = "";
857 int pos = name.find_first_of('/');
858 if (pos != string::npos) {
859 restOfName = name.substr(pos);
860 name = name.substr(0, pos);
864 itUnique = uniqueNames.find(name);
866 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
868 //only limit repeats on query names
869 if (temp == "Query") {
870 itNames = namesInFile.find((itUnique->second));
872 if (itNames == namesInFile.end()) {
873 out << itUnique->second << restOfName << endl;
874 namesInFile.insert((itUnique->second));
876 }else { out << itUnique->second << restOfName << endl; }
881 }else { //not need to alter line
882 out3 << line << endl;
884 }else { out3 << endl; }
889 m->mothurRemove(alnsFileName);
890 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
895 catch(exception& e) {
896 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
900 //**********************************************************************************************************************
901 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
904 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
907 m->openOutputFile(filename, out);
909 //print new file in order of
910 for (int i = 0; i < nameMapCount.size(); i++) {
911 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
917 catch(exception& e) {
918 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
922 //**********************************************************************************************************************
923 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
925 //create input file for uchime
926 //read through fastafile and store info
928 m->openInputFile(filename, in);
932 if (m->control_pressed) { in.close(); return 0; }
934 Sequence seq(in); m->gobble(in);
935 seqs[seq.getName()] = seq.getAligned();
941 catch(exception& e) {
942 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
946 //**********************************************************************************************************************
948 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
950 string nameFile = "";
952 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
954 //use unique.seqs to create new name and fastafile
955 string inputString = "fasta=" + inputFile;
956 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
957 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
958 m->mothurCalling = true;
960 Command* uniqueCommand = new DeconvoluteCommand(inputString);
961 uniqueCommand->execute();
963 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
965 delete uniqueCommand;
966 m->mothurCalling = false;
967 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
969 nameFile = filenames["name"][0];
970 inputFile = filenames["fasta"][0];
974 catch(exception& e) {
975 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
979 //**********************************************************************************************************************
980 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
986 for (int i = start; i < end; i++) {
987 int start = time(NULL); if (m->control_pressed) { return 0; }
989 int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; }
991 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
992 totalSeqs += numSeqs;
994 if (m->control_pressed) { return 0; }
996 //remove file made for uchime
997 if (!m->debug) { m->mothurRemove(filename); }
998 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1001 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1002 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1003 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1005 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1011 catch(exception& e) {
1012 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1016 //**********************************************************************************************************************
1018 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1021 outputFName = m->getFullPathName(outputFName);
1022 filename = m->getFullPathName(filename);
1023 alns = m->getFullPathName(alns);
1025 //to allow for spaces in the path
1026 outputFName = "\"" + outputFName + "\"";
1027 filename = "\"" + filename + "\"";
1028 alns = "\"" + alns + "\"";
1030 vector<char*> cPara;
1032 string uchimeCommand = uchimeLocation;
1033 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1034 uchimeCommand += " ";
1036 uchimeCommand = "\"" + uchimeCommand + "\"";
1040 tempUchime= new char[uchimeCommand.length()+1];
1042 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1043 cPara.push_back(tempUchime);
1045 char* tempIn = new char[8];
1046 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1047 //strcpy(tempIn, "--input");
1048 cPara.push_back(tempIn);
1049 char* temp = new char[filename.length()+1];
1050 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1051 //strcpy(temp, filename.c_str());
1052 cPara.push_back(temp);
1054 //are you using a reference file
1055 if (templatefile != "self") {
1056 //add reference file
1057 char* tempRef = new char[5];
1058 //strcpy(tempRef, "--db");
1059 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1060 cPara.push_back(tempRef);
1061 char* tempR = new char[templatefile.length()+1];
1062 //strcpy(tempR, templatefile.c_str());
1063 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1064 cPara.push_back(tempR);
1067 char* tempO = new char[12];
1068 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1069 //strcpy(tempO, "--uchimeout");
1070 cPara.push_back(tempO);
1071 char* tempout = new char[outputFName.length()+1];
1072 //strcpy(tempout, outputFName.c_str());
1073 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1074 cPara.push_back(tempout);
1077 char* tempA = new char[13];
1078 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1079 //strcpy(tempA, "--uchimealns");
1080 cPara.push_back(tempA);
1081 char* tempa = new char[alns.length()+1];
1082 //strcpy(tempa, alns.c_str());
1083 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1084 cPara.push_back(tempa);
1088 char* tempskew = new char[9];
1089 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1090 //strcpy(tempskew, "--abskew");
1091 cPara.push_back(tempskew);
1092 char* tempSkew = new char[abskew.length()+1];
1093 //strcpy(tempSkew, abskew.c_str());
1094 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1095 cPara.push_back(tempSkew);
1099 char* tempminh = new char[7];
1100 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1101 //strcpy(tempminh, "--minh");
1102 cPara.push_back(tempminh);
1103 char* tempMinH = new char[minh.length()+1];
1104 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1105 //strcpy(tempMinH, minh.c_str());
1106 cPara.push_back(tempMinH);
1110 char* tempmindiv = new char[9];
1111 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1112 //strcpy(tempmindiv, "--mindiv");
1113 cPara.push_back(tempmindiv);
1114 char* tempMindiv = new char[mindiv.length()+1];
1115 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1116 //strcpy(tempMindiv, mindiv.c_str());
1117 cPara.push_back(tempMindiv);
1121 char* tempxn = new char[5];
1122 //strcpy(tempxn, "--xn");
1123 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1124 cPara.push_back(tempxn);
1125 char* tempXn = new char[xn.length()+1];
1126 //strcpy(tempXn, xn.c_str());
1127 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1128 cPara.push_back(tempXn);
1132 char* tempdn = new char[5];
1133 //strcpy(tempdn, "--dn");
1134 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1135 cPara.push_back(tempdn);
1136 char* tempDn = new char[dn.length()+1];
1137 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1138 //strcpy(tempDn, dn.c_str());
1139 cPara.push_back(tempDn);
1143 char* tempxa = new char[5];
1144 //strcpy(tempxa, "--xa");
1145 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1146 cPara.push_back(tempxa);
1147 char* tempXa = new char[xa.length()+1];
1148 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1149 //strcpy(tempXa, xa.c_str());
1150 cPara.push_back(tempXa);
1154 char* tempchunks = new char[9];
1155 //strcpy(tempchunks, "--chunks");
1156 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1157 cPara.push_back(tempchunks);
1158 char* tempChunks = new char[chunks.length()+1];
1159 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1160 //strcpy(tempChunks, chunks.c_str());
1161 cPara.push_back(tempChunks);
1165 char* tempminchunk = new char[11];
1166 //strcpy(tempminchunk, "--minchunk");
1167 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1168 cPara.push_back(tempminchunk);
1169 char* tempMinchunk = new char[minchunk.length()+1];
1170 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1171 //strcpy(tempMinchunk, minchunk.c_str());
1172 cPara.push_back(tempMinchunk);
1175 if (useIdsmoothwindow) {
1176 char* tempidsmoothwindow = new char[17];
1177 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1178 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1179 cPara.push_back(tempidsmoothwindow);
1180 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1181 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1182 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1183 cPara.push_back(tempIdsmoothwindow);
1186 /*if (useMinsmoothid) {
1187 char* tempminsmoothid = new char[14];
1188 //strcpy(tempminsmoothid, "--minsmoothid");
1189 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1190 cPara.push_back(tempminsmoothid);
1191 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1192 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1193 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1194 cPara.push_back(tempMinsmoothid);
1198 char* tempmaxp = new char[7];
1199 //strcpy(tempmaxp, "--maxp");
1200 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1201 cPara.push_back(tempmaxp);
1202 char* tempMaxp = new char[maxp.length()+1];
1203 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1204 //strcpy(tempMaxp, maxp.c_str());
1205 cPara.push_back(tempMaxp);
1209 char* tempskipgaps = new char[13];
1210 //strcpy(tempskipgaps, "--[no]skipgaps");
1211 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1212 cPara.push_back(tempskipgaps);
1216 char* tempskipgaps2 = new char[14];
1217 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1218 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1219 cPara.push_back(tempskipgaps2);
1223 char* tempminlen = new char[9];
1224 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1225 //strcpy(tempminlen, "--minlen");
1226 cPara.push_back(tempminlen);
1227 char* tempMinlen = new char[minlen.length()+1];
1228 //strcpy(tempMinlen, minlen.c_str());
1229 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1230 cPara.push_back(tempMinlen);
1234 char* tempmaxlen = new char[9];
1235 //strcpy(tempmaxlen, "--maxlen");
1236 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1237 cPara.push_back(tempmaxlen);
1238 char* tempMaxlen = new char[maxlen.length()+1];
1239 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1240 //strcpy(tempMaxlen, maxlen.c_str());
1241 cPara.push_back(tempMaxlen);
1245 char* tempucl = new char[5];
1246 strcpy(tempucl, "--ucl");
1247 cPara.push_back(tempucl);
1250 if (useQueryfract) {
1251 char* tempqueryfract = new char[13];
1252 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1253 //strcpy(tempqueryfract, "--queryfract");
1254 cPara.push_back(tempqueryfract);
1255 char* tempQueryfract = new char[queryfract.length()+1];
1256 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1257 //strcpy(tempQueryfract, queryfract.c_str());
1258 cPara.push_back(tempQueryfract);
1262 char** uchimeParameters;
1263 uchimeParameters = new char*[cPara.size()];
1264 string commandString = "";
1265 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1266 //int numArgs = cPara.size();
1268 //uchime_main(numArgs, uchimeParameters);
1269 //cout << "commandString = " << commandString << endl;
1270 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1272 commandString = "\"" + commandString + "\"";
1274 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1275 system(commandString.c_str());
1278 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1279 delete[] uchimeParameters;
1281 //remove "" from filenames
1282 outputFName = outputFName.substr(1, outputFName.length()-2);
1283 filename = filename.substr(1, filename.length()-2);
1284 alns = alns.substr(1, alns.length()-2);
1286 if (m->control_pressed) { return 0; }
1288 //create accnos file from uchime results
1290 m->openInputFile(outputFName, in);
1293 m->openOutputFile(accnos, out);
1299 if (m->control_pressed) { break; }
1302 string chimeraFlag = "";
1303 in >> chimeraFlag >> name;
1305 //fix name if needed
1306 if (templatefile == "self") {
1307 name = name.substr(0, name.length()-1); //rip off last /
1308 name = name.substr(0, name.find_last_of('/'));
1311 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1314 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1322 catch(exception& e) {
1323 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1327 /**************************************************************************************************/
1329 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1335 vector<string> files;
1337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1338 //break up file into multiple files
1339 m->divideFile(filename, processors, files);
1341 if (m->control_pressed) { return 0; }
1343 //loop through and create all the processes you want
1344 while (process != processors) {
1348 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1350 }else if (pid == 0){
1351 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1353 //pass numSeqs to parent
1355 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1356 m->openOutputFile(tempFile, out);
1358 out << numChimeras << endl;
1363 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1364 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1370 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1372 //force parent to wait until all the processes are done
1373 for (int i=0;i<processIDS.size();i++) {
1374 int temp = processIDS[i];
1378 for (int i = 0; i < processIDS.size(); i++) {
1380 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1381 m->openInputFile(tempFile, in);
1384 in >> tempNum; m->gobble(in);
1387 numChimeras += tempNum;
1389 in.close(); m->mothurRemove(tempFile);
1392 //////////////////////////////////////////////////////////////////////////////////////////////////////
1393 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1394 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1395 //////////////////////////////////////////////////////////////////////////////////////////////////////
1400 map<int, ofstream*> filehandles;
1401 map<int, ofstream*>::iterator it3;
1404 for (int i = 0; i < processors; i++) {
1405 temp = new ofstream;
1406 filehandles[i] = temp;
1407 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1408 files.push_back(filename+toString(i)+".temp");
1412 m->openInputFile(filename, in);
1416 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1418 Sequence tempSeq(in); m->gobble(in);
1420 if (tempSeq.getName() != "") {
1421 tempSeq.printSequence(*(filehandles[spot]));
1423 if (spot == processors) { spot = 0; }
1429 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1430 (*(it3->second)).close();
1434 //sanity check for number of processors
1435 if (count < processors) { processors = count; }
1437 vector<uchimeData*> pDataArray;
1438 DWORD dwThreadIdArray[processors-1];
1439 HANDLE hThreadArray[processors-1];
1440 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1442 //Create processor worker threads.
1443 for( int i=1; i<processors; i++ ){
1444 // Allocate memory for thread data.
1445 string extension = toString(i) + ".temp";
1447 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1448 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1449 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1451 pDataArray.push_back(tempUchime);
1452 processIDS.push_back(i);
1454 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1455 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1456 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1460 //using the main process as a worker saves time and memory
1461 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1463 //Wait until all threads have terminated.
1464 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1466 //Close all thread handles and free memory allocations.
1467 for(int i=0; i < pDataArray.size(); i++){
1468 num += pDataArray[i]->count;
1469 numChimeras += pDataArray[i]->numChimeras;
1470 CloseHandle(hThreadArray[i]);
1471 delete pDataArray[i];
1475 //append output files
1476 for(int i=0;i<processIDS.size();i++){
1477 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1478 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1480 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1481 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1484 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1485 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1489 //get rid of the file pieces.
1490 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1493 catch(exception& e) {
1494 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1498 /**************************************************************************************************/
1500 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1508 if (groups.size() < processors) { processors = groups.size(); }
1510 //divide the groups between the processors
1511 vector<linePair> lines;
1512 int numGroupsPerProcessor = groups.size() / processors;
1513 for (int i = 0; i < processors; i++) {
1514 int startIndex = i * numGroupsPerProcessor;
1515 int endIndex = (i+1) * numGroupsPerProcessor;
1516 if(i == (processors - 1)){ endIndex = groups.size(); }
1517 lines.push_back(linePair(startIndex, endIndex));
1520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1522 //loop through and create all the processes you want
1523 while (process != processors) {
1527 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1529 }else if (pid == 0){
1530 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1532 //pass numSeqs to parent
1534 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1535 m->openOutputFile(tempFile, out);
1541 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1542 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1548 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1550 //force parent to wait until all the processes are done
1551 for (int i=0;i<processIDS.size();i++) {
1552 int temp = processIDS[i];
1556 for (int i = 0; i < processIDS.size(); i++) {
1558 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1559 m->openInputFile(tempFile, in);
1560 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1561 in.close(); m->mothurRemove(tempFile);
1565 //////////////////////////////////////////////////////////////////////////////////////////////////////
1566 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1567 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1568 //////////////////////////////////////////////////////////////////////////////////////////////////////
1570 vector<uchimeData*> pDataArray;
1571 DWORD dwThreadIdArray[processors-1];
1572 HANDLE hThreadArray[processors-1];
1574 //Create processor worker threads.
1575 for( int i=1; i<processors; i++ ){
1576 // Allocate memory for thread data.
1577 string extension = toString(i) + ".temp";
1579 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1580 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1581 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1583 pDataArray.push_back(tempUchime);
1584 processIDS.push_back(i);
1586 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1587 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1588 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1592 //using the main process as a worker saves time and memory
1593 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1595 //Wait until all threads have terminated.
1596 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1598 //Close all thread handles and free memory allocations.
1599 for(int i=0; i < pDataArray.size(); i++){
1600 num += pDataArray[i]->count;
1601 CloseHandle(hThreadArray[i]);
1602 delete pDataArray[i];
1607 //append output files
1608 for(int i=0;i<processIDS.size();i++){
1609 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1610 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1612 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1613 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1616 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1617 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1624 catch(exception& e) {
1625 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1629 /**************************************************************************************************/