2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
68 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
87 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
89 helpString += "The chimera.uchime command should be in the following format: \n";
90 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
96 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){
103 abort = true; calledHelp = true;
105 vector<string> tempOutNames;
106 outputTypes["chimera"] = tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
108 outputTypes["alns"] = tempOutNames;
110 catch(exception& e) {
111 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
118 abort = false; calledHelp = false;
119 ReferenceDB* rdb = ReferenceDB::getInstance();
121 //allow user to run help
122 if(option == "help") { help(); abort = true; calledHelp = true; }
123 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
126 vector<string> myArray = setParameters();
128 OptionParser parser(option);
129 map<string,string> parameters = parser.getParameters();
131 ValidParameters validParameter("chimera.uchime");
132 map<string,string>::iterator it;
134 //check to make sure all parameters are valid for command
135 for (it = parameters.begin(); it != parameters.end(); it++) {
136 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
139 vector<string> tempOutNames;
140 outputTypes["chimera"] = tempOutNames;
141 outputTypes["accnos"] = tempOutNames;
142 outputTypes["alns"] = tempOutNames;
144 //if the user changes the input directory command factory will send this info to us in the output parameter
145 string inputDir = validParameter.validFile(parameters, "inputdir", false);
146 if (inputDir == "not found"){ inputDir = ""; }
148 //check for required parameters
149 fastafile = validParameter.validFile(parameters, "fasta", false);
150 if (fastafile == "not found") {
151 //if there is a current fasta file, use it
152 string filename = m->getFastaFile();
153 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
156 m->splitAtDash(fastafile, fastaFileNames);
158 //go through files and make sure they are good, if not, then disregard them
159 for (int i = 0; i < fastaFileNames.size(); i++) {
162 if (fastaFileNames[i] == "current") {
163 fastaFileNames[i] = m->getFastaFile();
164 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
166 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
167 //erase from file list
168 fastaFileNames.erase(fastaFileNames.begin()+i);
175 if (inputDir != "") {
176 string path = m->hasPath(fastaFileNames[i]);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
184 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
186 //if you can't open it, try default location
187 if (ableToOpen == 1) {
188 if (m->getDefaultPath() != "") { //default path is set
189 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
192 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
194 fastaFileNames[i] = tryPath;
198 if (ableToOpen == 1) {
199 if (m->getOutputDir() != "") { //default path is set
200 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
203 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
205 fastaFileNames[i] = tryPath;
211 if (ableToOpen == 1) {
212 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
213 //erase from file list
214 fastaFileNames.erase(fastaFileNames.begin()+i);
217 m->setFastaFile(fastaFileNames[i]);
222 //make sure there is at least one valid file left
223 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
227 //check for required parameters
229 namefile = validParameter.validFile(parameters, "name", false);
230 if (namefile == "not found") { namefile = ""; hasName = false; }
232 m->splitAtDash(namefile, nameFileNames);
234 //go through files and make sure they are good, if not, then disregard them
235 for (int i = 0; i < nameFileNames.size(); i++) {
238 if (nameFileNames[i] == "current") {
239 nameFileNames[i] = m->getNameFile();
240 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
242 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
243 //erase from file list
244 nameFileNames.erase(nameFileNames.begin()+i);
251 if (inputDir != "") {
252 string path = m->hasPath(nameFileNames[i]);
253 //if the user has not given a path then, add inputdir. else leave path alone.
254 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
260 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
262 //if you can't open it, try default location
263 if (ableToOpen == 1) {
264 if (m->getDefaultPath() != "") { //default path is set
265 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
268 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
270 nameFileNames[i] = tryPath;
274 if (ableToOpen == 1) {
275 if (m->getOutputDir() != "") { //default path is set
276 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
279 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
281 nameFileNames[i] = tryPath;
287 if (ableToOpen == 1) {
288 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
289 //erase from file list
290 nameFileNames.erase(nameFileNames.begin()+i);
293 m->setNameFile(nameFileNames[i]);
298 //make sure there is at least one valid file left
299 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
302 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
304 bool hasGroup = true;
305 groupfile = validParameter.validFile(parameters, "group", false);
306 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
308 m->splitAtDash(groupfile, groupFileNames);
310 //go through files and make sure they are good, if not, then disregard them
311 for (int i = 0; i < groupFileNames.size(); i++) {
314 if (groupFileNames[i] == "current") {
315 groupFileNames[i] = m->getGroupFile();
316 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
318 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
319 //erase from file list
320 groupFileNames.erase(groupFileNames.begin()+i);
327 if (inputDir != "") {
328 string path = m->hasPath(groupFileNames[i]);
329 //if the user has not given a path then, add inputdir. else leave path alone.
330 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
336 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
338 //if you can't open it, try default location
339 if (ableToOpen == 1) {
340 if (m->getDefaultPath() != "") { //default path is set
341 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
344 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
346 groupFileNames[i] = tryPath;
350 if (ableToOpen == 1) {
351 if (m->getOutputDir() != "") { //default path is set
352 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
355 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
357 groupFileNames[i] = tryPath;
363 if (ableToOpen == 1) {
364 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
365 //erase from file list
366 groupFileNames.erase(groupFileNames.begin()+i);
369 m->setGroupFile(groupFileNames[i]);
374 //make sure there is at least one valid file left
375 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
378 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
381 //if the user changes the output directory command factory will send this info to us in the output parameter
382 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
385 it = parameters.find("reference");
386 //user has given a template file
387 if(it != parameters.end()){
388 if (it->second == "self") { templatefile = "self"; }
390 path = m->hasPath(it->second);
391 //if the user has not given a path then, add inputdir. else leave path alone.
392 if (path == "") { parameters["reference"] = inputDir + it->second; }
394 templatefile = validParameter.validFile(parameters, "reference", true);
395 if (templatefile == "not open") { abort = true; }
396 else if (templatefile == "not found") { //check for saved reference sequences
397 if (rdb->getSavedReference() != "") {
398 templatefile = rdb->getSavedReference();
399 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
401 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
402 m->mothurOutEndLine();
407 }else if (hasName) { templatefile = "self"; }
409 if (rdb->getSavedReference() != "") {
410 templatefile = rdb->getSavedReference();
411 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
413 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
414 m->mothurOutEndLine();
415 templatefile = ""; abort = true;
419 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
420 m->setProcessors(temp);
421 m->mothurConvert(temp, processors);
423 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
424 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
426 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
427 chimealns = m->isTrue(temp);
429 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
430 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
431 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
432 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
433 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
434 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
435 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
436 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
437 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
438 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
439 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
440 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
442 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
443 ucl = m->isTrue(temp);
445 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
446 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
448 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
449 skipgaps = m->isTrue(temp);
451 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
452 skipgaps2 = m->isTrue(temp);
454 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
457 //look for uchime exe
459 string tempPath = path;
460 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461 path = path.substr(0, (tempPath.find_last_of('m')));
463 string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
465 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
467 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
468 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
469 newCommand->execute();
471 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
472 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
473 newCommand->execute();
477 uchimeCommand = path + "uchime.exe";
480 //test to make sure uchime exists
482 uchimeCommand = m->getFullPathName(uchimeCommand);
483 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
484 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
487 catch(exception& e) {
488 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
492 //***************************************************************************************************************
494 int ChimeraUchimeCommand::execute(){
496 if (abort == true) { if (calledHelp) { return 0; } return 2; }
498 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
500 for (int s = 0; s < fastaFileNames.size(); s++) {
502 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
504 int start = time(NULL);
505 string nameFile = "";
506 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
507 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
508 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
509 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
510 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
512 //you provided a groupfile
513 string groupFile = "";
514 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
516 if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
518 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
519 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
520 nameFile = nameFileNames[s];
521 }else { nameFile = getNamesFile(fastaFileNames[s]); }
523 map<string, string> seqs;
524 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
527 vector<seqPriorityNode> nameMapCount;
528 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
529 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
530 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
532 printFile(nameMapCount, newFasta);
533 fastaFileNames[s] = newFasta;
536 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
538 if (groupFile != "") {
539 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
540 nameFile = nameFileNames[s];
541 }else { nameFile = getNamesFile(fastaFileNames[s]); }
543 //Parse sequences by group
544 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
545 vector<string> groups = parser.getNamesOfGroups();
547 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
550 ofstream out, out1, out2;
551 m->openOutputFile(outputFileName, out); out.close();
552 m->openOutputFile(accnosFileName, out1); out1.close();
553 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
556 if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
557 else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
559 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
561 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
563 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
564 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
566 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
569 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
574 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
575 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
579 m->openOutputFile(outputFileName+".temp", out);
580 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
583 m->appendFiles(outputFileName, outputFileName+".temp");
584 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
586 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
588 //remove file made for uchime
589 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
591 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
594 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
595 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
596 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
599 //set accnos file as new current accnosfile
601 itTypes = outputTypes.find("accnos");
602 if (itTypes != outputTypes.end()) {
603 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
606 m->mothurOutEndLine();
607 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
608 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
609 m->mothurOutEndLine();
614 catch(exception& e) {
615 m->errorOut(e, "ChimeraUchimeCommand", "execute");
619 //**********************************************************************************************************************
620 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
622 map<string, string> uniqueNames = parser.getAllSeqsMap();
623 map<string, string>::iterator itUnique;
628 m->openInputFile(accnosFileName, in2);
631 m->openOutputFile(accnosFileName+".temp", out2);
634 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
635 set<string>::iterator itNames;
636 set<string> chimerasInFile;
637 set<string>::iterator itChimeras;
641 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
643 in2 >> name; m->gobble(in2);
646 itUnique = uniqueNames.find(name);
648 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
650 itChimeras = chimerasInFile.find((itUnique->second));
652 if (itChimeras == chimerasInFile.end()) {
653 out2 << itUnique->second << endl;
654 chimerasInFile.insert((itUnique->second));
662 m->mothurRemove(accnosFileName);
663 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
669 m->openInputFile(outputFileName, in);
672 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
673 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
676 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
679 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
680 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
681 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
682 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
687 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
690 in >> temp1; m->gobble(in);
691 in >> name; m->gobble(in);
692 in >> parent1; m->gobble(in);
693 in >> parent2; m->gobble(in);
694 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
697 //parse name - name will look like U68590/ab=1/
698 string restOfName = "";
699 int pos = name.find_first_of('/');
700 if (pos != string::npos) {
701 restOfName = name.substr(pos);
702 name = name.substr(0, pos);
706 itUnique = uniqueNames.find(name);
708 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
710 name = itUnique->second;
711 //is this name already in the file
712 itNames = namesInFile.find((name));
714 if (itNames == namesInFile.end()) { //no not in file
715 if (flag == "N") { //are you really a no??
716 //is this sequence really not chimeric??
717 itChimeras = chimerasInFile.find(name);
719 //then you really are a no so print, otherwise skip
720 if (itChimeras == chimerasInFile.end()) { print = true; }
721 }else{ print = true; }
726 out << temp1 << '\t' << name << restOfName << '\t';
727 namesInFile.insert(name);
729 //parse parent1 names
730 if (parent1 != "*") {
732 pos = parent1.find_first_of('/');
733 if (pos != string::npos) {
734 restOfName = parent1.substr(pos);
735 parent1 = parent1.substr(0, pos);
738 itUnique = uniqueNames.find(parent1);
739 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
740 else { out << itUnique->second << restOfName << '\t'; }
741 }else { out << parent1 << '\t'; }
743 //parse parent2 names
744 if (parent2 != "*") {
746 pos = parent2.find_first_of('/');
747 if (pos != string::npos) {
748 restOfName = parent2.substr(pos);
749 parent2 = parent2.substr(0, pos);
752 itUnique = uniqueNames.find(parent2);
753 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
754 else { out << itUnique->second << restOfName << '\t'; }
755 }else { out << parent2 << '\t'; }
757 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
763 m->mothurRemove(outputFileName);
764 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
768 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
770 ------------------------------------------------------------------------
771 Query ( 179 nt) F21Fcsw_11639/ab=591/
772 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
773 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
775 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
776 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
777 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
778 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
779 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
780 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
782 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
783 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
784 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
785 Diffs NNN N N N N N BB NNN
786 Votes 000 0 0 0 0 0 ++ 000
787 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
789 A 159 TGGAACTGAGACACGGTCCAA 179
790 Q 159 TGGAACTGAGACACGGTCCAA 179
791 B 161 TGGAACTGAGACACGGTCCAA 181
794 Model BBBBBBBBBBBBBBBBBBBBB
796 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
797 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
801 m->openInputFile(alnsFileName, in3);
804 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
811 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
814 line = m->getline(in3);
818 istringstream iss(line);
821 //are you a name line
822 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
824 for (int i = 0; i < line.length(); i++) {
826 if (line[i] == ')') { break; }
827 else { out3 << line[i]; }
830 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
831 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
833 out << line[spot] << line[spot+1];
835 name = line.substr(spot+2);
837 //parse name - name will either look like U68590/ab=1/ or U68590
838 string restOfName = "";
839 int pos = name.find_first_of('/');
840 if (pos != string::npos) {
841 restOfName = name.substr(pos);
842 name = name.substr(0, pos);
846 itUnique = uniqueNames.find(name);
848 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
850 //only limit repeats on query names
851 if (temp == "Query") {
852 itNames = namesInFile.find((itUnique->second));
854 if (itNames == namesInFile.end()) {
855 out << itUnique->second << restOfName << endl;
856 namesInFile.insert((itUnique->second));
858 }else { out << itUnique->second << restOfName << endl; }
863 }else { //not need to alter line
864 out3 << line << endl;
866 }else { out3 << endl; }
871 m->mothurRemove(alnsFileName);
872 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
877 catch(exception& e) {
878 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
882 //**********************************************************************************************************************
883 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
886 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
889 m->openOutputFile(filename, out);
891 //print new file in order of
892 for (int i = 0; i < nameMapCount.size(); i++) {
893 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
899 catch(exception& e) {
900 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
904 //**********************************************************************************************************************
905 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
907 //create input file for uchime
908 //read through fastafile and store info
910 m->openInputFile(filename, in);
914 if (m->control_pressed) { in.close(); return 0; }
916 Sequence seq(in); m->gobble(in);
917 seqs[seq.getName()] = seq.getAligned();
923 catch(exception& e) {
924 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
928 //**********************************************************************************************************************
930 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
932 string nameFile = "";
934 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
936 //use unique.seqs to create new name and fastafile
937 string inputString = "fasta=" + inputFile;
938 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
939 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
940 m->mothurCalling = true;
942 Command* uniqueCommand = new DeconvoluteCommand(inputString);
943 uniqueCommand->execute();
945 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
947 delete uniqueCommand;
948 m->mothurCalling = false;
949 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
951 nameFile = filenames["name"][0];
952 inputFile = filenames["fasta"][0];
956 catch(exception& e) {
957 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
961 //**********************************************************************************************************************
962 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
968 for (int i = start; i < end; i++) {
969 int start = time(NULL); if (m->control_pressed) { return 0; }
971 int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; }
973 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
974 totalSeqs += numSeqs;
976 if (m->control_pressed) { return 0; }
978 //remove file made for uchime
979 m->mothurRemove(filename);
982 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
983 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
984 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
986 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
992 catch(exception& e) {
993 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
997 //**********************************************************************************************************************
999 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1002 outputFName = m->getFullPathName(outputFName);
1003 filename = m->getFullPathName(filename);
1004 alns = m->getFullPathName(alns);
1006 //to allow for spaces in the path
1007 outputFName = "\"" + outputFName + "\"";
1008 filename = "\"" + filename + "\"";
1009 alns = "\"" + alns + "\"";
1011 vector<char*> cPara;
1013 string path = m->argv;
1014 string tempPath = path;
1015 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
1016 path = path.substr(0, (tempPath.find_last_of('m')));
1018 string uchimeCommand = path;
1019 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1020 uchimeCommand += "uchime ";
1022 uchimeCommand += "uchime";
1023 uchimeCommand = "\"" + uchimeCommand + "\"";
1027 tempUchime= new char[uchimeCommand.length()+1];
1029 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1030 cPara.push_back(tempUchime);
1032 char* tempIn = new char[8];
1033 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1034 //strcpy(tempIn, "--input");
1035 cPara.push_back(tempIn);
1036 char* temp = new char[filename.length()+1];
1037 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1038 //strcpy(temp, filename.c_str());
1039 cPara.push_back(temp);
1041 //are you using a reference file
1042 if (templatefile != "self") {
1043 //add reference file
1044 char* tempRef = new char[5];
1045 //strcpy(tempRef, "--db");
1046 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1047 cPara.push_back(tempRef);
1048 char* tempR = new char[templatefile.length()+1];
1049 //strcpy(tempR, templatefile.c_str());
1050 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1051 cPara.push_back(tempR);
1054 char* tempO = new char[12];
1055 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1056 //strcpy(tempO, "--uchimeout");
1057 cPara.push_back(tempO);
1058 char* tempout = new char[outputFName.length()+1];
1059 //strcpy(tempout, outputFName.c_str());
1060 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1061 cPara.push_back(tempout);
1064 char* tempA = new char[13];
1065 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1066 //strcpy(tempA, "--uchimealns");
1067 cPara.push_back(tempA);
1068 char* tempa = new char[alns.length()+1];
1069 //strcpy(tempa, alns.c_str());
1070 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1071 cPara.push_back(tempa);
1075 char* tempskew = new char[9];
1076 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1077 //strcpy(tempskew, "--abskew");
1078 cPara.push_back(tempskew);
1079 char* tempSkew = new char[abskew.length()+1];
1080 //strcpy(tempSkew, abskew.c_str());
1081 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1082 cPara.push_back(tempSkew);
1086 char* tempminh = new char[7];
1087 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1088 //strcpy(tempminh, "--minh");
1089 cPara.push_back(tempminh);
1090 char* tempMinH = new char[minh.length()+1];
1091 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1092 //strcpy(tempMinH, minh.c_str());
1093 cPara.push_back(tempMinH);
1097 char* tempmindiv = new char[9];
1098 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1099 //strcpy(tempmindiv, "--mindiv");
1100 cPara.push_back(tempmindiv);
1101 char* tempMindiv = new char[mindiv.length()+1];
1102 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1103 //strcpy(tempMindiv, mindiv.c_str());
1104 cPara.push_back(tempMindiv);
1108 char* tempxn = new char[5];
1109 //strcpy(tempxn, "--xn");
1110 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1111 cPara.push_back(tempxn);
1112 char* tempXn = new char[xn.length()+1];
1113 //strcpy(tempXn, xn.c_str());
1114 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1115 cPara.push_back(tempXn);
1119 char* tempdn = new char[5];
1120 //strcpy(tempdn, "--dn");
1121 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1122 cPara.push_back(tempdn);
1123 char* tempDn = new char[dn.length()+1];
1124 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1125 //strcpy(tempDn, dn.c_str());
1126 cPara.push_back(tempDn);
1130 char* tempxa = new char[5];
1131 //strcpy(tempxa, "--xa");
1132 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1133 cPara.push_back(tempxa);
1134 char* tempXa = new char[xa.length()+1];
1135 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1136 //strcpy(tempXa, xa.c_str());
1137 cPara.push_back(tempXa);
1141 char* tempchunks = new char[9];
1142 //strcpy(tempchunks, "--chunks");
1143 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1144 cPara.push_back(tempchunks);
1145 char* tempChunks = new char[chunks.length()+1];
1146 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1147 //strcpy(tempChunks, chunks.c_str());
1148 cPara.push_back(tempChunks);
1152 char* tempminchunk = new char[11];
1153 //strcpy(tempminchunk, "--minchunk");
1154 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1155 cPara.push_back(tempminchunk);
1156 char* tempMinchunk = new char[minchunk.length()+1];
1157 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1158 //strcpy(tempMinchunk, minchunk.c_str());
1159 cPara.push_back(tempMinchunk);
1162 if (useIdsmoothwindow) {
1163 char* tempidsmoothwindow = new char[17];
1164 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1165 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1166 cPara.push_back(tempidsmoothwindow);
1167 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1168 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1169 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1170 cPara.push_back(tempIdsmoothwindow);
1173 /*if (useMinsmoothid) {
1174 char* tempminsmoothid = new char[14];
1175 //strcpy(tempminsmoothid, "--minsmoothid");
1176 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1177 cPara.push_back(tempminsmoothid);
1178 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1179 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1180 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1181 cPara.push_back(tempMinsmoothid);
1185 char* tempmaxp = new char[7];
1186 //strcpy(tempmaxp, "--maxp");
1187 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1188 cPara.push_back(tempmaxp);
1189 char* tempMaxp = new char[maxp.length()+1];
1190 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1191 //strcpy(tempMaxp, maxp.c_str());
1192 cPara.push_back(tempMaxp);
1196 char* tempskipgaps = new char[13];
1197 //strcpy(tempskipgaps, "--[no]skipgaps");
1198 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1199 cPara.push_back(tempskipgaps);
1203 char* tempskipgaps2 = new char[14];
1204 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1205 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1206 cPara.push_back(tempskipgaps2);
1210 char* tempminlen = new char[9];
1211 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1212 //strcpy(tempminlen, "--minlen");
1213 cPara.push_back(tempminlen);
1214 char* tempMinlen = new char[minlen.length()+1];
1215 //strcpy(tempMinlen, minlen.c_str());
1216 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1217 cPara.push_back(tempMinlen);
1221 char* tempmaxlen = new char[9];
1222 //strcpy(tempmaxlen, "--maxlen");
1223 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1224 cPara.push_back(tempmaxlen);
1225 char* tempMaxlen = new char[maxlen.length()+1];
1226 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1227 //strcpy(tempMaxlen, maxlen.c_str());
1228 cPara.push_back(tempMaxlen);
1232 char* tempucl = new char[5];
1233 strcpy(tempucl, "--ucl");
1234 cPara.push_back(tempucl);
1237 if (useQueryfract) {
1238 char* tempqueryfract = new char[13];
1239 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1240 //strcpy(tempqueryfract, "--queryfract");
1241 cPara.push_back(tempqueryfract);
1242 char* tempQueryfract = new char[queryfract.length()+1];
1243 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1244 //strcpy(tempQueryfract, queryfract.c_str());
1245 cPara.push_back(tempQueryfract);
1249 char** uchimeParameters;
1250 uchimeParameters = new char*[cPara.size()];
1251 string commandString = "";
1252 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1253 //int numArgs = cPara.size();
1255 //uchime_main(numArgs, uchimeParameters);
1256 //cout << "commandString = " << commandString << endl;
1257 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1259 commandString = "\"" + commandString + "\"";
1261 system(commandString.c_str());
1264 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1265 delete[] uchimeParameters;
1267 //remove "" from filenames
1268 outputFName = outputFName.substr(1, outputFName.length()-2);
1269 filename = filename.substr(1, filename.length()-2);
1270 alns = alns.substr(1, alns.length()-2);
1272 if (m->control_pressed) { return 0; }
1274 //create accnos file from uchime results
1276 m->openInputFile(outputFName, in);
1279 m->openOutputFile(accnos, out);
1285 if (m->control_pressed) { break; }
1288 string chimeraFlag = "";
1289 in >> chimeraFlag >> name;
1291 //fix name if needed
1292 if (templatefile == "self") {
1293 name = name.substr(0, name.length()-1); //rip off last /
1294 name = name.substr(0, name.find_last_of('/'));
1297 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1300 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1308 catch(exception& e) {
1309 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1313 /**************************************************************************************************/
1315 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1321 vector<string> files;
1323 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1324 //break up file into multiple files
1325 m->divideFile(filename, processors, files);
1327 if (m->control_pressed) { return 0; }
1329 //loop through and create all the processes you want
1330 while (process != processors) {
1334 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1336 }else if (pid == 0){
1337 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1339 //pass numSeqs to parent
1341 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1342 m->openOutputFile(tempFile, out);
1344 out << numChimeras << endl;
1349 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1350 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1356 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1358 //force parent to wait until all the processes are done
1359 for (int i=0;i<processIDS.size();i++) {
1360 int temp = processIDS[i];
1364 for (int i = 0; i < processIDS.size(); i++) {
1366 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1367 m->openInputFile(tempFile, in);
1370 in >> tempNum; m->gobble(in);
1373 numChimeras += tempNum;
1375 in.close(); m->mothurRemove(tempFile);
1378 //////////////////////////////////////////////////////////////////////////////////////////////////////
1379 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1380 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1381 //////////////////////////////////////////////////////////////////////////////////////////////////////
1386 map<int, ofstream*> filehandles;
1387 map<int, ofstream*>::iterator it3;
1390 for (int i = 0; i < processors; i++) {
1391 temp = new ofstream;
1392 filehandles[i] = temp;
1393 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1394 files.push_back(filename+toString(i)+".temp");
1398 m->openInputFile(filename, in);
1402 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1404 Sequence tempSeq(in); m->gobble(in);
1406 if (tempSeq.getName() != "") {
1407 tempSeq.printSequence(*(filehandles[spot]));
1409 if (spot == processors) { spot = 0; }
1415 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1416 (*(it3->second)).close();
1420 //sanity check for number of processors
1421 if (count < processors) { processors = count; }
1423 vector<uchimeData*> pDataArray;
1424 DWORD dwThreadIdArray[processors-1];
1425 HANDLE hThreadArray[processors-1];
1426 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1428 //Create processor worker threads.
1429 for( int i=1; i<processors; i++ ){
1430 // Allocate memory for thread data.
1431 string extension = toString(i) + ".temp";
1433 uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1434 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1435 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1437 pDataArray.push_back(tempUchime);
1438 processIDS.push_back(i);
1440 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1441 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1442 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1446 //using the main process as a worker saves time and memory
1447 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1449 //Wait until all threads have terminated.
1450 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1452 //Close all thread handles and free memory allocations.
1453 for(int i=0; i < pDataArray.size(); i++){
1454 num += pDataArray[i]->count;
1455 numChimeras += pDataArray[i]->numChimeras;
1456 CloseHandle(hThreadArray[i]);
1457 delete pDataArray[i];
1461 //append output files
1462 for(int i=0;i<processIDS.size();i++){
1463 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1464 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1466 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1467 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1470 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1471 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1475 //get rid of the file pieces.
1476 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1479 catch(exception& e) {
1480 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1484 /**************************************************************************************************/
1486 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1494 if (groups.size() < processors) { processors = groups.size(); }
1496 //divide the groups between the processors
1497 vector<linePair> lines;
1498 int numGroupsPerProcessor = groups.size() / processors;
1499 for (int i = 0; i < processors; i++) {
1500 int startIndex = i * numGroupsPerProcessor;
1501 int endIndex = (i+1) * numGroupsPerProcessor;
1502 if(i == (processors - 1)){ endIndex = groups.size(); }
1503 lines.push_back(linePair(startIndex, endIndex));
1506 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1508 //loop through and create all the processes you want
1509 while (process != processors) {
1513 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1515 }else if (pid == 0){
1516 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1518 //pass numSeqs to parent
1520 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1521 m->openOutputFile(tempFile, out);
1527 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1528 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1534 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1536 //force parent to wait until all the processes are done
1537 for (int i=0;i<processIDS.size();i++) {
1538 int temp = processIDS[i];
1542 for (int i = 0; i < processIDS.size(); i++) {
1544 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1545 m->openInputFile(tempFile, in);
1546 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1547 in.close(); m->mothurRemove(tempFile);
1551 //////////////////////////////////////////////////////////////////////////////////////////////////////
1552 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1553 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1554 //////////////////////////////////////////////////////////////////////////////////////////////////////
1556 vector<uchimeData*> pDataArray;
1557 DWORD dwThreadIdArray[processors-1];
1558 HANDLE hThreadArray[processors-1];
1560 //Create processor worker threads.
1561 for( int i=1; i<processors; i++ ){
1562 // Allocate memory for thread data.
1563 string extension = toString(i) + ".temp";
1565 uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1566 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1567 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1569 pDataArray.push_back(tempUchime);
1570 processIDS.push_back(i);
1572 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1573 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1574 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1578 //using the main process as a worker saves time and memory
1579 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1581 //Wait until all threads have terminated.
1582 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1584 //Close all thread handles and free memory allocations.
1585 for(int i=0; i < pDataArray.size(); i++){
1586 num += pDataArray[i]->count;
1587 CloseHandle(hThreadArray[i]);
1588 delete pDataArray[i];
1593 //append output files
1594 for(int i=0;i<processIDS.size();i++){
1595 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1596 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1598 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1599 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1602 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1603 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1610 catch(exception& e) {
1611 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1615 /**************************************************************************************************/