2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
68 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
87 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
89 helpString += "The chimera.uchime command should be in the following format: \n";
90 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
96 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){
103 abort = true; calledHelp = true;
105 vector<string> tempOutNames;
106 outputTypes["chimera"] = tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
108 outputTypes["alns"] = tempOutNames;
110 catch(exception& e) {
111 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
118 abort = false; calledHelp = false;
119 ReferenceDB* rdb = ReferenceDB::getInstance();
121 //allow user to run help
122 if(option == "help") { help(); abort = true; calledHelp = true; }
123 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
126 vector<string> myArray = setParameters();
128 OptionParser parser(option);
129 map<string,string> parameters = parser.getParameters();
131 ValidParameters validParameter("chimera.uchime");
132 map<string,string>::iterator it;
134 //check to make sure all parameters are valid for command
135 for (it = parameters.begin(); it != parameters.end(); it++) {
136 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
139 vector<string> tempOutNames;
140 outputTypes["chimera"] = tempOutNames;
141 outputTypes["accnos"] = tempOutNames;
142 outputTypes["alns"] = tempOutNames;
144 //if the user changes the input directory command factory will send this info to us in the output parameter
145 string inputDir = validParameter.validFile(parameters, "inputdir", false);
146 if (inputDir == "not found"){ inputDir = ""; }
148 //check for required parameters
149 fastafile = validParameter.validFile(parameters, "fasta", false);
150 if (fastafile == "not found") {
151 //if there is a current fasta file, use it
152 string filename = m->getFastaFile();
153 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
156 m->splitAtDash(fastafile, fastaFileNames);
158 //go through files and make sure they are good, if not, then disregard them
159 for (int i = 0; i < fastaFileNames.size(); i++) {
162 if (fastaFileNames[i] == "current") {
163 fastaFileNames[i] = m->getFastaFile();
164 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
166 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
167 //erase from file list
168 fastaFileNames.erase(fastaFileNames.begin()+i);
175 if (inputDir != "") {
176 string path = m->hasPath(fastaFileNames[i]);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
184 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
186 //if you can't open it, try default location
187 if (ableToOpen == 1) {
188 if (m->getDefaultPath() != "") { //default path is set
189 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
192 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
194 fastaFileNames[i] = tryPath;
198 if (ableToOpen == 1) {
199 if (m->getOutputDir() != "") { //default path is set
200 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
203 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
205 fastaFileNames[i] = tryPath;
211 if (ableToOpen == 1) {
212 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
213 //erase from file list
214 fastaFileNames.erase(fastaFileNames.begin()+i);
217 m->setFastaFile(fastaFileNames[i]);
222 //make sure there is at least one valid file left
223 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
227 //check for required parameters
229 namefile = validParameter.validFile(parameters, "name", false);
230 if (namefile == "not found") { namefile = ""; hasName = false; }
232 m->splitAtDash(namefile, nameFileNames);
234 //go through files and make sure they are good, if not, then disregard them
235 for (int i = 0; i < nameFileNames.size(); i++) {
238 if (nameFileNames[i] == "current") {
239 nameFileNames[i] = m->getNameFile();
240 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
242 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
243 //erase from file list
244 nameFileNames.erase(nameFileNames.begin()+i);
251 if (inputDir != "") {
252 string path = m->hasPath(nameFileNames[i]);
253 //if the user has not given a path then, add inputdir. else leave path alone.
254 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
260 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
262 //if you can't open it, try default location
263 if (ableToOpen == 1) {
264 if (m->getDefaultPath() != "") { //default path is set
265 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
268 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
270 nameFileNames[i] = tryPath;
274 if (ableToOpen == 1) {
275 if (m->getOutputDir() != "") { //default path is set
276 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
279 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
281 nameFileNames[i] = tryPath;
287 if (ableToOpen == 1) {
288 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
289 //erase from file list
290 nameFileNames.erase(nameFileNames.begin()+i);
293 m->setNameFile(nameFileNames[i]);
298 //make sure there is at least one valid file left
299 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
302 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
304 bool hasGroup = true;
305 groupfile = validParameter.validFile(parameters, "group", false);
306 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
308 m->splitAtDash(groupfile, groupFileNames);
310 //go through files and make sure they are good, if not, then disregard them
311 for (int i = 0; i < groupFileNames.size(); i++) {
314 if (groupFileNames[i] == "current") {
315 groupFileNames[i] = m->getGroupFile();
316 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
318 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
319 //erase from file list
320 groupFileNames.erase(groupFileNames.begin()+i);
327 if (inputDir != "") {
328 string path = m->hasPath(groupFileNames[i]);
329 //if the user has not given a path then, add inputdir. else leave path alone.
330 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
336 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
338 //if you can't open it, try default location
339 if (ableToOpen == 1) {
340 if (m->getDefaultPath() != "") { //default path is set
341 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
344 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
346 groupFileNames[i] = tryPath;
350 if (ableToOpen == 1) {
351 if (m->getOutputDir() != "") { //default path is set
352 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
355 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
357 groupFileNames[i] = tryPath;
363 if (ableToOpen == 1) {
364 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
365 //erase from file list
366 groupFileNames.erase(groupFileNames.begin()+i);
369 m->setGroupFile(groupFileNames[i]);
374 //make sure there is at least one valid file left
375 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
378 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
381 //if the user changes the output directory command factory will send this info to us in the output parameter
382 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
385 it = parameters.find("reference");
386 //user has given a template file
387 if(it != parameters.end()){
388 if (it->second == "self") { templatefile = "self"; }
390 path = m->hasPath(it->second);
391 //if the user has not given a path then, add inputdir. else leave path alone.
392 if (path == "") { parameters["reference"] = inputDir + it->second; }
394 templatefile = validParameter.validFile(parameters, "reference", true);
395 if (templatefile == "not open") { abort = true; }
396 else if (templatefile == "not found") { //check for saved reference sequences
397 if (rdb->getSavedReference() != "") {
398 templatefile = rdb->getSavedReference();
399 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
401 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
402 m->mothurOutEndLine();
407 }else if (hasName) { templatefile = "self"; }
409 if (rdb->getSavedReference() != "") {
410 templatefile = rdb->getSavedReference();
411 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
413 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
414 m->mothurOutEndLine();
415 templatefile = ""; abort = true;
419 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
420 m->setProcessors(temp);
421 convert(temp, processors);
423 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
424 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
426 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
427 chimealns = m->isTrue(temp);
429 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
430 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
431 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
432 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
433 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
434 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
435 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
436 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
437 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
438 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
439 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
440 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
442 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
443 ucl = m->isTrue(temp);
445 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
446 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
448 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
449 skipgaps = m->isTrue(temp);
451 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
452 skipgaps2 = m->isTrue(temp);
454 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
457 //look for uchime exe
459 string tempPath = path;
460 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461 path = path.substr(0, (tempPath.find_last_of('m')));
463 string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
465 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
467 uchimeCommand = path + "uchime.exe";
470 //test to make sure uchime exists
472 uchimeCommand = m->getFullPathName(uchimeCommand);
473 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
474 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
477 catch(exception& e) {
478 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
482 //***************************************************************************************************************
484 int ChimeraUchimeCommand::execute(){
486 if (abort == true) { if (calledHelp) { return 0; } return 2; }
488 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
490 for (int s = 0; s < fastaFileNames.size(); s++) {
492 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
494 int start = time(NULL);
495 string nameFile = "";
496 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
497 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
498 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
499 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
500 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
502 //you provided a groupfile
503 string groupFile = "";
504 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
506 if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
508 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
509 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
510 nameFile = nameFileNames[s];
511 }else { nameFile = getNamesFile(fastaFileNames[s]); }
513 map<string, string> seqs;
514 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
517 vector<seqPriorityNode> nameMapCount;
518 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
519 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
520 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
522 printFile(nameMapCount, newFasta);
523 fastaFileNames[s] = newFasta;
526 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
528 if (groupFile != "") {
529 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
530 nameFile = nameFileNames[s];
531 }else { nameFile = getNamesFile(fastaFileNames[s]); }
533 //Parse sequences by group
534 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
535 vector<string> groups = parser.getNamesOfGroups();
537 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
540 ofstream out, out1, out2;
541 m->openOutputFile(outputFileName, out); out.close();
542 m->openOutputFile(accnosFileName, out1); out1.close();
543 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
546 if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
547 else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups); }
549 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
551 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
553 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
554 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
556 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
559 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
564 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
565 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
569 m->openOutputFile(outputFileName+".temp", out);
570 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
573 m->appendFiles(outputFileName, outputFileName+".temp");
574 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
576 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
578 //remove file made for uchime
579 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
581 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
584 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
585 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
586 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
589 //set accnos file as new current accnosfile
591 itTypes = outputTypes.find("accnos");
592 if (itTypes != outputTypes.end()) {
593 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
596 m->mothurOutEndLine();
597 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
598 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
599 m->mothurOutEndLine();
604 catch(exception& e) {
605 m->errorOut(e, "ChimeraUchimeCommand", "execute");
609 //**********************************************************************************************************************
610 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
612 map<string, string> uniqueNames = parser.getAllSeqsMap();
613 map<string, string>::iterator itUnique;
618 m->openInputFile(accnosFileName, in2);
621 m->openOutputFile(accnosFileName+".temp", out2);
624 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
625 set<string>::iterator itNames;
626 set<string> chimerasInFile;
627 set<string>::iterator itChimeras;
631 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
633 in2 >> name; m->gobble(in2);
636 itUnique = uniqueNames.find(name);
638 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
640 itChimeras = chimerasInFile.find((itUnique->second));
642 if (itChimeras == chimerasInFile.end()) {
643 out2 << itUnique->second << endl;
644 chimerasInFile.insert((itUnique->second));
652 m->mothurRemove(accnosFileName);
653 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
659 m->openInputFile(outputFileName, in);
662 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
663 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
666 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
669 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
670 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
671 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
672 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
677 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
680 in >> temp1; m->gobble(in);
681 in >> name; m->gobble(in);
682 in >> parent1; m->gobble(in);
683 in >> parent2; m->gobble(in);
684 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
687 //parse name - name will look like U68590/ab=1/
688 string restOfName = "";
689 int pos = name.find_first_of('/');
690 if (pos != string::npos) {
691 restOfName = name.substr(pos);
692 name = name.substr(0, pos);
696 itUnique = uniqueNames.find(name);
698 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
700 name = itUnique->second;
701 //is this name already in the file
702 itNames = namesInFile.find((name));
704 if (itNames == namesInFile.end()) { //no not in file
705 if (flag == "N") { //are you really a no??
706 //is this sequence really not chimeric??
707 itChimeras = chimerasInFile.find(name);
709 //then you really are a no so print, otherwise skip
710 if (itChimeras == chimerasInFile.end()) { print = true; }
711 }else{ print = true; }
716 out << temp1 << '\t' << name << restOfName << '\t';
717 namesInFile.insert(name);
719 //parse parent1 names
720 if (parent1 != "*") {
722 pos = parent1.find_first_of('/');
723 if (pos != string::npos) {
724 restOfName = parent1.substr(pos);
725 parent1 = parent1.substr(0, pos);
728 itUnique = uniqueNames.find(parent1);
729 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
730 else { out << itUnique->second << restOfName << '\t'; }
731 }else { out << parent1 << '\t'; }
733 //parse parent2 names
734 if (parent2 != "*") {
736 pos = parent2.find_first_of('/');
737 if (pos != string::npos) {
738 restOfName = parent2.substr(pos);
739 parent2 = parent2.substr(0, pos);
742 itUnique = uniqueNames.find(parent2);
743 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
744 else { out << itUnique->second << restOfName << '\t'; }
745 }else { out << parent2 << '\t'; }
747 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
753 m->mothurRemove(outputFileName);
754 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
758 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
760 ------------------------------------------------------------------------
761 Query ( 179 nt) F21Fcsw_11639/ab=591/
762 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
763 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
765 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
766 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
767 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
768 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
769 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
770 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
772 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
773 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
774 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
775 Diffs NNN N N N N N BB NNN
776 Votes 000 0 0 0 0 0 ++ 000
777 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
779 A 159 TGGAACTGAGACACGGTCCAA 179
780 Q 159 TGGAACTGAGACACGGTCCAA 179
781 B 161 TGGAACTGAGACACGGTCCAA 181
784 Model BBBBBBBBBBBBBBBBBBBBB
786 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
787 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
791 m->openInputFile(alnsFileName, in3);
794 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
801 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
804 line = m->getline(in3);
808 istringstream iss(line);
811 //are you a name line
812 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
814 for (int i = 0; i < line.length(); i++) {
816 if (line[i] == ')') { break; }
817 else { out3 << line[i]; }
820 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
821 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
823 out << line[spot] << line[spot+1];
825 name = line.substr(spot+2);
827 //parse name - name will either look like U68590/ab=1/ or U68590
828 string restOfName = "";
829 int pos = name.find_first_of('/');
830 if (pos != string::npos) {
831 restOfName = name.substr(pos);
832 name = name.substr(0, pos);
836 itUnique = uniqueNames.find(name);
838 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
840 //only limit repeats on query names
841 if (temp == "Query") {
842 itNames = namesInFile.find((itUnique->second));
844 if (itNames == namesInFile.end()) {
845 out << itUnique->second << restOfName << endl;
846 namesInFile.insert((itUnique->second));
848 }else { out << itUnique->second << restOfName << endl; }
853 }else { //not need to alter line
854 out3 << line << endl;
856 }else { out3 << endl; }
861 m->mothurRemove(alnsFileName);
862 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
867 catch(exception& e) {
868 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
872 //**********************************************************************************************************************
873 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
876 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
879 m->openOutputFile(filename, out);
881 //print new file in order of
882 for (int i = 0; i < nameMapCount.size(); i++) {
883 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
889 catch(exception& e) {
890 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
894 //**********************************************************************************************************************
895 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
897 //create input file for uchime
898 //read through fastafile and store info
900 m->openInputFile(filename, in);
904 if (m->control_pressed) { in.close(); return 0; }
906 Sequence seq(in); m->gobble(in);
907 seqs[seq.getName()] = seq.getAligned();
913 catch(exception& e) {
914 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
918 //**********************************************************************************************************************
920 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
922 string nameFile = "";
924 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
926 //use unique.seqs to create new name and fastafile
927 string inputString = "fasta=" + inputFile;
928 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
929 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
931 Command* uniqueCommand = new DeconvoluteCommand(inputString);
932 uniqueCommand->execute();
934 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
936 delete uniqueCommand;
938 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
940 nameFile = filenames["name"][0];
941 inputFile = filenames["fasta"][0];
945 catch(exception& e) {
946 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
950 //**********************************************************************************************************************
951 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
957 for (int i = start; i < end; i++) {
958 int start = time(NULL); if (m->control_pressed) { return 0; }
960 int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; }
962 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
963 totalSeqs += numSeqs;
965 if (m->control_pressed) { return 0; }
967 //remove file made for uchime
968 m->mothurRemove(filename);
971 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
972 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
973 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
975 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
981 catch(exception& e) {
982 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
986 //**********************************************************************************************************************
988 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
990 //to allow for spaces in the path
991 outputFName = "\"" + outputFName + "\"";
992 filename = "\"" + filename + "\"";
993 alns = "\"" + alns + "\"";
997 string path = m->argv;
998 string tempPath = path;
999 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
1000 path = path.substr(0, (tempPath.find_last_of('m')));
1002 string uchimeCommand = path;
1003 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1004 uchimeCommand += "uchime ";
1006 uchimeCommand += "uchime";
1007 uchimeCommand = "\"" + uchimeCommand + "\"";
1011 tempUchime= new char[uchimeCommand.length()+1];
1013 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1014 cPara.push_back(tempUchime);
1016 char* tempIn = new char[8];
1017 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1018 //strcpy(tempIn, "--input");
1019 cPara.push_back(tempIn);
1020 char* temp = new char[filename.length()+1];
1021 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1022 //strcpy(temp, filename.c_str());
1023 cPara.push_back(temp);
1025 //are you using a reference file
1026 if (templatefile != "self") {
1027 //add reference file
1028 char* tempRef = new char[5];
1029 //strcpy(tempRef, "--db");
1030 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1031 cPara.push_back(tempRef);
1032 char* tempR = new char[templatefile.length()+1];
1033 //strcpy(tempR, templatefile.c_str());
1034 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1035 cPara.push_back(tempR);
1038 char* tempO = new char[12];
1039 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1040 //strcpy(tempO, "--uchimeout");
1041 cPara.push_back(tempO);
1042 char* tempout = new char[outputFName.length()+1];
1043 //strcpy(tempout, outputFName.c_str());
1044 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1045 cPara.push_back(tempout);
1048 char* tempA = new char[13];
1049 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1050 //strcpy(tempA, "--uchimealns");
1051 cPara.push_back(tempA);
1052 char* tempa = new char[alns.length()+1];
1053 //strcpy(tempa, alns.c_str());
1054 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1055 cPara.push_back(tempa);
1059 char* tempskew = new char[9];
1060 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1061 //strcpy(tempskew, "--abskew");
1062 cPara.push_back(tempskew);
1063 char* tempSkew = new char[abskew.length()+1];
1064 //strcpy(tempSkew, abskew.c_str());
1065 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1066 cPara.push_back(tempSkew);
1070 char* tempminh = new char[7];
1071 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1072 //strcpy(tempminh, "--minh");
1073 cPara.push_back(tempminh);
1074 char* tempMinH = new char[minh.length()+1];
1075 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1076 //strcpy(tempMinH, minh.c_str());
1077 cPara.push_back(tempMinH);
1081 char* tempmindiv = new char[9];
1082 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1083 //strcpy(tempmindiv, "--mindiv");
1084 cPara.push_back(tempmindiv);
1085 char* tempMindiv = new char[mindiv.length()+1];
1086 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1087 //strcpy(tempMindiv, mindiv.c_str());
1088 cPara.push_back(tempMindiv);
1092 char* tempxn = new char[5];
1093 //strcpy(tempxn, "--xn");
1094 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1095 cPara.push_back(tempxn);
1096 char* tempXn = new char[xn.length()+1];
1097 //strcpy(tempXn, xn.c_str());
1098 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1099 cPara.push_back(tempXn);
1103 char* tempdn = new char[5];
1104 //strcpy(tempdn, "--dn");
1105 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1106 cPara.push_back(tempdn);
1107 char* tempDn = new char[dn.length()+1];
1108 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1109 //strcpy(tempDn, dn.c_str());
1110 cPara.push_back(tempDn);
1114 char* tempxa = new char[5];
1115 //strcpy(tempxa, "--xa");
1116 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1117 cPara.push_back(tempxa);
1118 char* tempXa = new char[xa.length()+1];
1119 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1120 //strcpy(tempXa, xa.c_str());
1121 cPara.push_back(tempXa);
1125 char* tempchunks = new char[9];
1126 //strcpy(tempchunks, "--chunks");
1127 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1128 cPara.push_back(tempchunks);
1129 char* tempChunks = new char[chunks.length()+1];
1130 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1131 //strcpy(tempChunks, chunks.c_str());
1132 cPara.push_back(tempChunks);
1136 char* tempminchunk = new char[11];
1137 //strcpy(tempminchunk, "--minchunk");
1138 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1139 cPara.push_back(tempminchunk);
1140 char* tempMinchunk = new char[minchunk.length()+1];
1141 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1142 //strcpy(tempMinchunk, minchunk.c_str());
1143 cPara.push_back(tempMinchunk);
1146 if (useIdsmoothwindow) {
1147 char* tempidsmoothwindow = new char[17];
1148 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1149 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1150 cPara.push_back(tempidsmoothwindow);
1151 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1152 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1153 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1154 cPara.push_back(tempIdsmoothwindow);
1157 /*if (useMinsmoothid) {
1158 char* tempminsmoothid = new char[14];
1159 //strcpy(tempminsmoothid, "--minsmoothid");
1160 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1161 cPara.push_back(tempminsmoothid);
1162 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1163 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1164 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1165 cPara.push_back(tempMinsmoothid);
1169 char* tempmaxp = new char[7];
1170 //strcpy(tempmaxp, "--maxp");
1171 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1172 cPara.push_back(tempmaxp);
1173 char* tempMaxp = new char[maxp.length()+1];
1174 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1175 //strcpy(tempMaxp, maxp.c_str());
1176 cPara.push_back(tempMaxp);
1180 char* tempskipgaps = new char[13];
1181 //strcpy(tempskipgaps, "--[no]skipgaps");
1182 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1183 cPara.push_back(tempskipgaps);
1187 char* tempskipgaps2 = new char[14];
1188 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1189 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1190 cPara.push_back(tempskipgaps2);
1194 char* tempminlen = new char[9];
1195 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1196 //strcpy(tempminlen, "--minlen");
1197 cPara.push_back(tempminlen);
1198 char* tempMinlen = new char[minlen.length()+1];
1199 //strcpy(tempMinlen, minlen.c_str());
1200 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1201 cPara.push_back(tempMinlen);
1205 char* tempmaxlen = new char[9];
1206 //strcpy(tempmaxlen, "--maxlen");
1207 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1208 cPara.push_back(tempmaxlen);
1209 char* tempMaxlen = new char[maxlen.length()+1];
1210 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1211 //strcpy(tempMaxlen, maxlen.c_str());
1212 cPara.push_back(tempMaxlen);
1216 char* tempucl = new char[5];
1217 strcpy(tempucl, "--ucl");
1218 cPara.push_back(tempucl);
1221 if (useQueryfract) {
1222 char* tempqueryfract = new char[13];
1223 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1224 //strcpy(tempqueryfract, "--queryfract");
1225 cPara.push_back(tempqueryfract);
1226 char* tempQueryfract = new char[queryfract.length()+1];
1227 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1228 //strcpy(tempQueryfract, queryfract.c_str());
1229 cPara.push_back(tempQueryfract);
1233 char** uchimeParameters;
1234 uchimeParameters = new char*[cPara.size()];
1235 string commandString = "";
1236 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1237 //int numArgs = cPara.size();
1239 //uchime_main(numArgs, uchimeParameters);
1240 //cout << "commandString = " << commandString << endl;
1241 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1243 commandString = "\"" + commandString + "\"";
1245 system(commandString.c_str());
1248 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1249 delete[] uchimeParameters;
1251 //remove "" from filenames
1252 outputFName = outputFName.substr(1, outputFName.length()-2);
1253 filename = filename.substr(1, filename.length()-2);
1254 alns = alns.substr(1, alns.length()-2);
1256 if (m->control_pressed) { return 0; }
1258 //create accnos file from uchime results
1260 m->openInputFile(outputFName, in);
1263 m->openOutputFile(accnos, out);
1269 if (m->control_pressed) { break; }
1272 string chimeraFlag = "";
1273 in >> chimeraFlag >> name;
1275 //fix name if needed
1276 if (templatefile == "self") {
1277 name = name.substr(0, name.length()-1); //rip off last /
1278 name = name.substr(0, name.find_last_of('/'));
1281 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1284 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1292 catch(exception& e) {
1293 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1297 /**************************************************************************************************/
1299 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1305 vector<string> files;
1307 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1308 //break up file into multiple files
1309 m->divideFile(filename, processors, files);
1311 if (m->control_pressed) { return 0; }
1313 //loop through and create all the processes you want
1314 while (process != processors) {
1318 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1320 }else if (pid == 0){
1321 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1323 //pass numSeqs to parent
1325 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1326 m->openOutputFile(tempFile, out);
1328 out << numChimeras << endl;
1333 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1334 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1340 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1342 //force parent to wait until all the processes are done
1343 for (int i=0;i<processIDS.size();i++) {
1344 int temp = processIDS[i];
1348 for (int i = 0; i < processIDS.size(); i++) {
1350 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1351 m->openInputFile(tempFile, in);
1354 in >> tempNum; m->gobble(in);
1357 numChimeras += tempNum;
1359 in.close(); m->mothurRemove(tempFile);
1362 //////////////////////////////////////////////////////////////////////////////////////////////////////
1363 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1364 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1365 //////////////////////////////////////////////////////////////////////////////////////////////////////
1370 map<int, ofstream*> filehandles;
1371 map<int, ofstream*>::iterator it3;
1374 for (int i = 0; i < processors; i++) {
1375 temp = new ofstream;
1376 filehandles[i] = temp;
1377 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1378 files.push_back(filename+toString(i)+".temp");
1382 m->openInputFile(filename, in);
1386 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1388 Sequence tempSeq(in); m->gobble(in);
1390 if (tempSeq.getName() != "") {
1391 tempSeq.printSequence(*(filehandles[spot]));
1393 if (spot == processors) { spot = 0; }
1399 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1400 (*(it3->second)).close();
1404 //sanity check for number of processors
1405 if (count < processors) { processors = count; }
1407 vector<uchimeData*> pDataArray;
1408 DWORD dwThreadIdArray[processors-1];
1409 HANDLE hThreadArray[processors-1];
1410 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1412 //Create processor worker threads.
1413 for( int i=1; i<processors; i++ ){
1414 // Allocate memory for thread data.
1415 string extension = toString(i) + ".temp";
1417 uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1418 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1419 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1421 pDataArray.push_back(tempUchime);
1422 processIDS.push_back(i);
1424 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1425 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1426 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1430 //using the main process as a worker saves time and memory
1431 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1433 //Wait until all threads have terminated.
1434 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1436 //Close all thread handles and free memory allocations.
1437 for(int i=0; i < pDataArray.size(); i++){
1438 num += pDataArray[i]->count;
1439 numChimeras += pDataArray[i]->numChimeras;
1440 CloseHandle(hThreadArray[i]);
1441 delete pDataArray[i];
1445 //append output files
1446 for(int i=0;i<processIDS.size();i++){
1447 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1448 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1450 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1451 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1454 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1455 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1459 //get rid of the file pieces.
1460 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1463 catch(exception& e) {
1464 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1468 /**************************************************************************************************/
1470 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups) {
1478 if (groups.size() < processors) { processors = groups.size(); }
1480 //divide the groups between the processors
1481 vector<linePair> lines;
1482 int numGroupsPerProcessor = groups.size() / processors;
1483 for (int i = 0; i < processors; i++) {
1484 int startIndex = i * numGroupsPerProcessor;
1485 int endIndex = (i+1) * numGroupsPerProcessor;
1486 if(i == (processors - 1)){ endIndex = groups.size(); }
1487 lines.push_back(linePair(startIndex, endIndex));
1490 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1492 //loop through and create all the processes you want
1493 while (process != processors) {
1497 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1499 }else if (pid == 0){
1500 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1502 //pass numSeqs to parent
1504 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1505 m->openOutputFile(tempFile, out);
1511 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1512 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1518 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1520 //force parent to wait until all the processes are done
1521 for (int i=0;i<processIDS.size();i++) {
1522 int temp = processIDS[i];
1526 for (int i = 0; i < processIDS.size(); i++) {
1528 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1529 m->openInputFile(tempFile, in);
1530 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1531 in.close(); m->mothurRemove(tempFile);
1535 //////////////////////////////////////////////////////////////////////////////////////////////////////
1536 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1537 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1538 //////////////////////////////////////////////////////////////////////////////////////////////////////
1540 vector<uchimeData*> pDataArray;
1541 DWORD dwThreadIdArray[processors-1];
1542 HANDLE hThreadArray[processors-1];
1544 //Create processor worker threads.
1545 for( int i=1; i<processors; i++ ){
1546 // Allocate memory for thread data.
1547 string extension = toString(i) + ".temp";
1549 uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastafile, namefile, groupfile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1550 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1551 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1553 pDataArray.push_back(tempUchime);
1554 processIDS.push_back(i);
1556 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1557 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1558 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1562 //using the main process as a worker saves time and memory
1563 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1565 //Wait until all threads have terminated.
1566 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1568 //Close all thread handles and free memory allocations.
1569 for(int i=0; i < pDataArray.size(); i++){
1570 num += pDataArray[i]->count;
1571 CloseHandle(hThreadArray[i]);
1572 delete pDataArray[i];
1577 //append output files
1578 for(int i=0;i<processIDS.size();i++){
1579 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1580 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1582 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1583 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1586 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1587 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1594 catch(exception& e) {
1595 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1599 /**************************************************************************************************/