2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
68 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
87 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
89 helpString += "The chimera.uchime command should be in the following format: \n";
90 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
96 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
100 //**********************************************************************************************************************
101 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){
103 string outputFileName = "";
104 map<string, vector<string> >::iterator it;
106 //is this a type this command creates
107 it = outputTypes.find(type);
108 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
110 if (type == "chimera") { outputFileName = "uchime.chimeras"; }
111 else if (type == "accnos") { outputFileName = "uchime.accnos"; }
112 else if (type == "alns") { outputFileName = "uchime.alns"; }
113 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
115 return outputFileName;
117 catch(exception& e) {
118 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
122 //**********************************************************************************************************************
123 ChimeraUchimeCommand::ChimeraUchimeCommand(){
125 abort = true; calledHelp = true;
127 vector<string> tempOutNames;
128 outputTypes["chimera"] = tempOutNames;
129 outputTypes["accnos"] = tempOutNames;
130 outputTypes["alns"] = tempOutNames;
132 catch(exception& e) {
133 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137 //***************************************************************************************************************
138 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
140 abort = false; calledHelp = false;
141 ReferenceDB* rdb = ReferenceDB::getInstance();
143 //allow user to run help
144 if(option == "help") { help(); abort = true; calledHelp = true; }
145 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
148 vector<string> myArray = setParameters();
150 OptionParser parser(option);
151 map<string,string> parameters = parser.getParameters();
153 ValidParameters validParameter("chimera.uchime");
154 map<string,string>::iterator it;
156 //check to make sure all parameters are valid for command
157 for (it = parameters.begin(); it != parameters.end(); it++) {
158 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
161 vector<string> tempOutNames;
162 outputTypes["chimera"] = tempOutNames;
163 outputTypes["accnos"] = tempOutNames;
164 outputTypes["alns"] = tempOutNames;
166 //if the user changes the input directory command factory will send this info to us in the output parameter
167 string inputDir = validParameter.validFile(parameters, "inputdir", false);
168 if (inputDir == "not found"){ inputDir = ""; }
170 //check for required parameters
171 fastafile = validParameter.validFile(parameters, "fasta", false);
172 if (fastafile == "not found") {
173 //if there is a current fasta file, use it
174 string filename = m->getFastaFile();
175 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
176 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
178 m->splitAtDash(fastafile, fastaFileNames);
180 //go through files and make sure they are good, if not, then disregard them
181 for (int i = 0; i < fastaFileNames.size(); i++) {
184 if (fastaFileNames[i] == "current") {
185 fastaFileNames[i] = m->getFastaFile();
186 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
188 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
189 //erase from file list
190 fastaFileNames.erase(fastaFileNames.begin()+i);
197 if (inputDir != "") {
198 string path = m->hasPath(fastaFileNames[i]);
199 //if the user has not given a path then, add inputdir. else leave path alone.
200 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
206 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
208 //if you can't open it, try default location
209 if (ableToOpen == 1) {
210 if (m->getDefaultPath() != "") { //default path is set
211 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
212 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
214 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
216 fastaFileNames[i] = tryPath;
220 if (ableToOpen == 1) {
221 if (m->getOutputDir() != "") { //default path is set
222 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
223 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
225 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
227 fastaFileNames[i] = tryPath;
233 if (ableToOpen == 1) {
234 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
235 //erase from file list
236 fastaFileNames.erase(fastaFileNames.begin()+i);
239 m->setFastaFile(fastaFileNames[i]);
244 //make sure there is at least one valid file left
245 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
249 //check for required parameters
251 namefile = validParameter.validFile(parameters, "name", false);
252 if (namefile == "not found") { namefile = ""; hasName = false; }
254 m->splitAtDash(namefile, nameFileNames);
256 //go through files and make sure they are good, if not, then disregard them
257 for (int i = 0; i < nameFileNames.size(); i++) {
260 if (nameFileNames[i] == "current") {
261 nameFileNames[i] = m->getNameFile();
262 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
264 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
265 //erase from file list
266 nameFileNames.erase(nameFileNames.begin()+i);
273 if (inputDir != "") {
274 string path = m->hasPath(nameFileNames[i]);
275 //if the user has not given a path then, add inputdir. else leave path alone.
276 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
282 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
284 //if you can't open it, try default location
285 if (ableToOpen == 1) {
286 if (m->getDefaultPath() != "") { //default path is set
287 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
288 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
290 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
292 nameFileNames[i] = tryPath;
296 if (ableToOpen == 1) {
297 if (m->getOutputDir() != "") { //default path is set
298 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
299 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
301 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
303 nameFileNames[i] = tryPath;
309 if (ableToOpen == 1) {
310 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
311 //erase from file list
312 nameFileNames.erase(nameFileNames.begin()+i);
315 m->setNameFile(nameFileNames[i]);
320 //make sure there is at least one valid file left
321 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
324 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
326 bool hasGroup = true;
327 groupfile = validParameter.validFile(parameters, "group", false);
328 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
330 m->splitAtDash(groupfile, groupFileNames);
332 //go through files and make sure they are good, if not, then disregard them
333 for (int i = 0; i < groupFileNames.size(); i++) {
336 if (groupFileNames[i] == "current") {
337 groupFileNames[i] = m->getGroupFile();
338 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
340 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
341 //erase from file list
342 groupFileNames.erase(groupFileNames.begin()+i);
349 if (inputDir != "") {
350 string path = m->hasPath(groupFileNames[i]);
351 //if the user has not given a path then, add inputdir. else leave path alone.
352 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
358 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
360 //if you can't open it, try default location
361 if (ableToOpen == 1) {
362 if (m->getDefaultPath() != "") { //default path is set
363 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
364 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
366 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
368 groupFileNames[i] = tryPath;
372 if (ableToOpen == 1) {
373 if (m->getOutputDir() != "") { //default path is set
374 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
375 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
377 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
379 groupFileNames[i] = tryPath;
385 if (ableToOpen == 1) {
386 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
387 //erase from file list
388 groupFileNames.erase(groupFileNames.begin()+i);
391 m->setGroupFile(groupFileNames[i]);
396 //make sure there is at least one valid file left
397 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
400 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
403 //if the user changes the output directory command factory will send this info to us in the output parameter
404 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
407 it = parameters.find("reference");
408 //user has given a template file
409 if(it != parameters.end()){
410 if (it->second == "self") { templatefile = "self"; }
412 path = m->hasPath(it->second);
413 //if the user has not given a path then, add inputdir. else leave path alone.
414 if (path == "") { parameters["reference"] = inputDir + it->second; }
416 templatefile = validParameter.validFile(parameters, "reference", true);
417 if (templatefile == "not open") { abort = true; }
418 else if (templatefile == "not found") { //check for saved reference sequences
419 if (rdb->getSavedReference() != "") {
420 templatefile = rdb->getSavedReference();
421 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
423 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
424 m->mothurOutEndLine();
429 }else if (hasName) { templatefile = "self"; }
431 if (rdb->getSavedReference() != "") {
432 templatefile = rdb->getSavedReference();
433 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
435 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
436 m->mothurOutEndLine();
437 templatefile = ""; abort = true;
441 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
442 m->setProcessors(temp);
443 m->mothurConvert(temp, processors);
445 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
446 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
448 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
449 chimealns = m->isTrue(temp);
451 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
452 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
453 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
454 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
455 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
456 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
457 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
458 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
459 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
460 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
461 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
462 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
464 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
465 ucl = m->isTrue(temp);
467 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
468 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
470 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
471 skipgaps = m->isTrue(temp);
473 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
474 skipgaps2 = m->isTrue(temp);
476 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
477 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
479 //look for uchime exe
481 string tempPath = path;
482 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
483 path = path.substr(0, (tempPath.find_last_of('m')));
485 string uchimeCommand;
486 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
487 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
489 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
490 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
491 newCommand->execute();
493 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
494 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
495 newCommand->execute();
499 uchimeCommand = path + "uchime.exe";
502 //test to make sure uchime exists
504 uchimeCommand = m->getFullPathName(uchimeCommand);
505 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
506 if(ableToOpen == 1) {
507 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
508 //check to see if uchime is in the path??
510 string uLocation = m->findProgramPath("uchime");
514 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
515 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
517 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
520 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
521 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
522 }else { uchimeLocation = uchimeCommand; }
524 uchimeLocation = m->getFullPathName(uchimeLocation);
527 catch(exception& e) {
528 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
532 //***************************************************************************************************************
534 int ChimeraUchimeCommand::execute(){
536 if (abort == true) { if (calledHelp) { return 0; } return 2; }
538 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
540 for (int s = 0; s < fastaFileNames.size(); s++) {
542 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
544 int start = time(NULL);
545 string nameFile = "";
546 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
547 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
548 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
549 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns");
550 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
552 //you provided a groupfile
553 string groupFile = "";
554 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
556 if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
558 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
559 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
560 nameFile = nameFileNames[s];
561 }else { nameFile = getNamesFile(fastaFileNames[s]); }
563 map<string, string> seqs;
564 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
567 vector<seqPriorityNode> nameMapCount;
568 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
569 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
570 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
572 printFile(nameMapCount, newFasta);
573 fastaFileNames[s] = newFasta;
576 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
578 if (groupFile != "") {
579 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
580 nameFile = nameFileNames[s];
581 }else { nameFile = getNamesFile(fastaFileNames[s]); }
583 //Parse sequences by group
584 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
585 vector<string> groups = parser.getNamesOfGroups();
587 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
590 ofstream out, out1, out2;
591 m->openOutputFile(outputFileName, out); out.close();
592 m->openOutputFile(accnosFileName, out1); out1.close();
593 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
596 if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
597 else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
599 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
601 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
603 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
604 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
606 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
609 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
614 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
615 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
619 m->openOutputFile(outputFileName+".temp", out);
620 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
623 m->appendFiles(outputFileName, outputFileName+".temp");
624 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
626 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
628 //remove file made for uchime
629 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
631 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
634 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
635 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
636 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
639 //set accnos file as new current accnosfile
641 itTypes = outputTypes.find("accnos");
642 if (itTypes != outputTypes.end()) {
643 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
646 m->mothurOutEndLine();
647 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
648 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
649 m->mothurOutEndLine();
654 catch(exception& e) {
655 m->errorOut(e, "ChimeraUchimeCommand", "execute");
659 //**********************************************************************************************************************
660 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
662 map<string, string> uniqueNames = parser.getAllSeqsMap();
663 map<string, string>::iterator itUnique;
668 m->openInputFile(accnosFileName, in2);
671 m->openOutputFile(accnosFileName+".temp", out2);
674 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
675 set<string>::iterator itNames;
676 set<string> chimerasInFile;
677 set<string>::iterator itChimeras;
681 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
683 in2 >> name; m->gobble(in2);
686 itUnique = uniqueNames.find(name);
688 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
690 itChimeras = chimerasInFile.find((itUnique->second));
692 if (itChimeras == chimerasInFile.end()) {
693 out2 << itUnique->second << endl;
694 chimerasInFile.insert((itUnique->second));
702 m->mothurRemove(accnosFileName);
703 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
709 m->openInputFile(outputFileName, in);
712 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
713 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
716 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
719 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
720 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
721 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
722 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
727 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
730 in >> temp1; m->gobble(in);
731 in >> name; m->gobble(in);
732 in >> parent1; m->gobble(in);
733 in >> parent2; m->gobble(in);
734 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
737 //parse name - name will look like U68590/ab=1/
738 string restOfName = "";
739 int pos = name.find_first_of('/');
740 if (pos != string::npos) {
741 restOfName = name.substr(pos);
742 name = name.substr(0, pos);
746 itUnique = uniqueNames.find(name);
748 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
750 name = itUnique->second;
751 //is this name already in the file
752 itNames = namesInFile.find((name));
754 if (itNames == namesInFile.end()) { //no not in file
755 if (flag == "N") { //are you really a no??
756 //is this sequence really not chimeric??
757 itChimeras = chimerasInFile.find(name);
759 //then you really are a no so print, otherwise skip
760 if (itChimeras == chimerasInFile.end()) { print = true; }
761 }else{ print = true; }
766 out << temp1 << '\t' << name << restOfName << '\t';
767 namesInFile.insert(name);
769 //parse parent1 names
770 if (parent1 != "*") {
772 pos = parent1.find_first_of('/');
773 if (pos != string::npos) {
774 restOfName = parent1.substr(pos);
775 parent1 = parent1.substr(0, pos);
778 itUnique = uniqueNames.find(parent1);
779 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
780 else { out << itUnique->second << restOfName << '\t'; }
781 }else { out << parent1 << '\t'; }
783 //parse parent2 names
784 if (parent2 != "*") {
786 pos = parent2.find_first_of('/');
787 if (pos != string::npos) {
788 restOfName = parent2.substr(pos);
789 parent2 = parent2.substr(0, pos);
792 itUnique = uniqueNames.find(parent2);
793 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
794 else { out << itUnique->second << restOfName << '\t'; }
795 }else { out << parent2 << '\t'; }
797 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
803 m->mothurRemove(outputFileName);
804 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
808 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
810 ------------------------------------------------------------------------
811 Query ( 179 nt) F21Fcsw_11639/ab=591/
812 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
813 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
815 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
816 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
817 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
818 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
819 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
820 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
822 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
823 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
824 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
825 Diffs NNN N N N N N BB NNN
826 Votes 000 0 0 0 0 0 ++ 000
827 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
829 A 159 TGGAACTGAGACACGGTCCAA 179
830 Q 159 TGGAACTGAGACACGGTCCAA 179
831 B 161 TGGAACTGAGACACGGTCCAA 181
834 Model BBBBBBBBBBBBBBBBBBBBB
836 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
837 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
841 m->openInputFile(alnsFileName, in3);
844 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
851 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
854 line = m->getline(in3);
858 istringstream iss(line);
861 //are you a name line
862 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
864 for (int i = 0; i < line.length(); i++) {
866 if (line[i] == ')') { break; }
867 else { out3 << line[i]; }
870 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
871 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
873 out << line[spot] << line[spot+1];
875 name = line.substr(spot+2);
877 //parse name - name will either look like U68590/ab=1/ or U68590
878 string restOfName = "";
879 int pos = name.find_first_of('/');
880 if (pos != string::npos) {
881 restOfName = name.substr(pos);
882 name = name.substr(0, pos);
886 itUnique = uniqueNames.find(name);
888 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
890 //only limit repeats on query names
891 if (temp == "Query") {
892 itNames = namesInFile.find((itUnique->second));
894 if (itNames == namesInFile.end()) {
895 out << itUnique->second << restOfName << endl;
896 namesInFile.insert((itUnique->second));
898 }else { out << itUnique->second << restOfName << endl; }
903 }else { //not need to alter line
904 out3 << line << endl;
906 }else { out3 << endl; }
911 m->mothurRemove(alnsFileName);
912 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
917 catch(exception& e) {
918 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
922 //**********************************************************************************************************************
923 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
926 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
929 m->openOutputFile(filename, out);
931 //print new file in order of
932 for (int i = 0; i < nameMapCount.size(); i++) {
933 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
939 catch(exception& e) {
940 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
944 //**********************************************************************************************************************
945 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
947 //create input file for uchime
948 //read through fastafile and store info
950 m->openInputFile(filename, in);
954 if (m->control_pressed) { in.close(); return 0; }
956 Sequence seq(in); m->gobble(in);
957 seqs[seq.getName()] = seq.getAligned();
963 catch(exception& e) {
964 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
968 //**********************************************************************************************************************
970 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
972 string nameFile = "";
974 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
976 //use unique.seqs to create new name and fastafile
977 string inputString = "fasta=" + inputFile;
978 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
979 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
980 m->mothurCalling = true;
982 Command* uniqueCommand = new DeconvoluteCommand(inputString);
983 uniqueCommand->execute();
985 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
987 delete uniqueCommand;
988 m->mothurCalling = false;
989 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
991 nameFile = filenames["name"][0];
992 inputFile = filenames["fasta"][0];
996 catch(exception& e) {
997 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1001 //**********************************************************************************************************************
1002 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1006 int numChimeras = 0;
1008 for (int i = start; i < end; i++) {
1009 int start = time(NULL); if (m->control_pressed) { return 0; }
1011 int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; }
1013 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1014 totalSeqs += numSeqs;
1016 if (m->control_pressed) { return 0; }
1018 //remove file made for uchime
1019 if (!m->debug) { m->mothurRemove(filename); }
1020 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1023 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1024 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1025 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1027 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1033 catch(exception& e) {
1034 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1038 //**********************************************************************************************************************
1040 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1043 outputFName = m->getFullPathName(outputFName);
1044 filename = m->getFullPathName(filename);
1045 alns = m->getFullPathName(alns);
1047 //to allow for spaces in the path
1048 outputFName = "\"" + outputFName + "\"";
1049 filename = "\"" + filename + "\"";
1050 alns = "\"" + alns + "\"";
1052 vector<char*> cPara;
1054 string uchimeCommand = uchimeLocation;
1055 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1056 uchimeCommand += " ";
1058 uchimeCommand = "\"" + uchimeCommand + "\"";
1062 tempUchime= new char[uchimeCommand.length()+1];
1064 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1065 cPara.push_back(tempUchime);
1067 char* tempIn = new char[8];
1068 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1069 //strcpy(tempIn, "--input");
1070 cPara.push_back(tempIn);
1071 char* temp = new char[filename.length()+1];
1072 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1073 //strcpy(temp, filename.c_str());
1074 cPara.push_back(temp);
1076 //are you using a reference file
1077 if (templatefile != "self") {
1078 //add reference file
1079 char* tempRef = new char[5];
1080 //strcpy(tempRef, "--db");
1081 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1082 cPara.push_back(tempRef);
1083 char* tempR = new char[templatefile.length()+1];
1084 //strcpy(tempR, templatefile.c_str());
1085 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1086 cPara.push_back(tempR);
1089 char* tempO = new char[12];
1090 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1091 //strcpy(tempO, "--uchimeout");
1092 cPara.push_back(tempO);
1093 char* tempout = new char[outputFName.length()+1];
1094 //strcpy(tempout, outputFName.c_str());
1095 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1096 cPara.push_back(tempout);
1099 char* tempA = new char[13];
1100 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1101 //strcpy(tempA, "--uchimealns");
1102 cPara.push_back(tempA);
1103 char* tempa = new char[alns.length()+1];
1104 //strcpy(tempa, alns.c_str());
1105 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1106 cPara.push_back(tempa);
1110 char* tempskew = new char[9];
1111 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1112 //strcpy(tempskew, "--abskew");
1113 cPara.push_back(tempskew);
1114 char* tempSkew = new char[abskew.length()+1];
1115 //strcpy(tempSkew, abskew.c_str());
1116 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1117 cPara.push_back(tempSkew);
1121 char* tempminh = new char[7];
1122 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1123 //strcpy(tempminh, "--minh");
1124 cPara.push_back(tempminh);
1125 char* tempMinH = new char[minh.length()+1];
1126 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1127 //strcpy(tempMinH, minh.c_str());
1128 cPara.push_back(tempMinH);
1132 char* tempmindiv = new char[9];
1133 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1134 //strcpy(tempmindiv, "--mindiv");
1135 cPara.push_back(tempmindiv);
1136 char* tempMindiv = new char[mindiv.length()+1];
1137 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1138 //strcpy(tempMindiv, mindiv.c_str());
1139 cPara.push_back(tempMindiv);
1143 char* tempxn = new char[5];
1144 //strcpy(tempxn, "--xn");
1145 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1146 cPara.push_back(tempxn);
1147 char* tempXn = new char[xn.length()+1];
1148 //strcpy(tempXn, xn.c_str());
1149 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1150 cPara.push_back(tempXn);
1154 char* tempdn = new char[5];
1155 //strcpy(tempdn, "--dn");
1156 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1157 cPara.push_back(tempdn);
1158 char* tempDn = new char[dn.length()+1];
1159 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1160 //strcpy(tempDn, dn.c_str());
1161 cPara.push_back(tempDn);
1165 char* tempxa = new char[5];
1166 //strcpy(tempxa, "--xa");
1167 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1168 cPara.push_back(tempxa);
1169 char* tempXa = new char[xa.length()+1];
1170 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1171 //strcpy(tempXa, xa.c_str());
1172 cPara.push_back(tempXa);
1176 char* tempchunks = new char[9];
1177 //strcpy(tempchunks, "--chunks");
1178 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1179 cPara.push_back(tempchunks);
1180 char* tempChunks = new char[chunks.length()+1];
1181 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1182 //strcpy(tempChunks, chunks.c_str());
1183 cPara.push_back(tempChunks);
1187 char* tempminchunk = new char[11];
1188 //strcpy(tempminchunk, "--minchunk");
1189 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1190 cPara.push_back(tempminchunk);
1191 char* tempMinchunk = new char[minchunk.length()+1];
1192 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1193 //strcpy(tempMinchunk, minchunk.c_str());
1194 cPara.push_back(tempMinchunk);
1197 if (useIdsmoothwindow) {
1198 char* tempidsmoothwindow = new char[17];
1199 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1200 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1201 cPara.push_back(tempidsmoothwindow);
1202 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1203 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1204 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1205 cPara.push_back(tempIdsmoothwindow);
1208 /*if (useMinsmoothid) {
1209 char* tempminsmoothid = new char[14];
1210 //strcpy(tempminsmoothid, "--minsmoothid");
1211 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1212 cPara.push_back(tempminsmoothid);
1213 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1214 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1215 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1216 cPara.push_back(tempMinsmoothid);
1220 char* tempmaxp = new char[7];
1221 //strcpy(tempmaxp, "--maxp");
1222 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1223 cPara.push_back(tempmaxp);
1224 char* tempMaxp = new char[maxp.length()+1];
1225 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1226 //strcpy(tempMaxp, maxp.c_str());
1227 cPara.push_back(tempMaxp);
1231 char* tempskipgaps = new char[13];
1232 //strcpy(tempskipgaps, "--[no]skipgaps");
1233 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1234 cPara.push_back(tempskipgaps);
1238 char* tempskipgaps2 = new char[14];
1239 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1240 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1241 cPara.push_back(tempskipgaps2);
1245 char* tempminlen = new char[9];
1246 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1247 //strcpy(tempminlen, "--minlen");
1248 cPara.push_back(tempminlen);
1249 char* tempMinlen = new char[minlen.length()+1];
1250 //strcpy(tempMinlen, minlen.c_str());
1251 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1252 cPara.push_back(tempMinlen);
1256 char* tempmaxlen = new char[9];
1257 //strcpy(tempmaxlen, "--maxlen");
1258 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1259 cPara.push_back(tempmaxlen);
1260 char* tempMaxlen = new char[maxlen.length()+1];
1261 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1262 //strcpy(tempMaxlen, maxlen.c_str());
1263 cPara.push_back(tempMaxlen);
1267 char* tempucl = new char[5];
1268 strcpy(tempucl, "--ucl");
1269 cPara.push_back(tempucl);
1272 if (useQueryfract) {
1273 char* tempqueryfract = new char[13];
1274 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1275 //strcpy(tempqueryfract, "--queryfract");
1276 cPara.push_back(tempqueryfract);
1277 char* tempQueryfract = new char[queryfract.length()+1];
1278 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1279 //strcpy(tempQueryfract, queryfract.c_str());
1280 cPara.push_back(tempQueryfract);
1284 char** uchimeParameters;
1285 uchimeParameters = new char*[cPara.size()];
1286 string commandString = "";
1287 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1288 //int numArgs = cPara.size();
1290 //uchime_main(numArgs, uchimeParameters);
1291 //cout << "commandString = " << commandString << endl;
1292 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1294 commandString = "\"" + commandString + "\"";
1296 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1297 system(commandString.c_str());
1300 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1301 delete[] uchimeParameters;
1303 //remove "" from filenames
1304 outputFName = outputFName.substr(1, outputFName.length()-2);
1305 filename = filename.substr(1, filename.length()-2);
1306 alns = alns.substr(1, alns.length()-2);
1308 if (m->control_pressed) { return 0; }
1310 //create accnos file from uchime results
1312 m->openInputFile(outputFName, in);
1315 m->openOutputFile(accnos, out);
1321 if (m->control_pressed) { break; }
1324 string chimeraFlag = "";
1325 in >> chimeraFlag >> name;
1327 //fix name if needed
1328 if (templatefile == "self") {
1329 name = name.substr(0, name.length()-1); //rip off last /
1330 name = name.substr(0, name.find_last_of('/'));
1333 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1336 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1344 catch(exception& e) {
1345 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1349 /**************************************************************************************************/
1351 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1357 vector<string> files;
1359 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1360 //break up file into multiple files
1361 m->divideFile(filename, processors, files);
1363 if (m->control_pressed) { return 0; }
1365 //loop through and create all the processes you want
1366 while (process != processors) {
1370 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1372 }else if (pid == 0){
1373 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1375 //pass numSeqs to parent
1377 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1378 m->openOutputFile(tempFile, out);
1380 out << numChimeras << endl;
1385 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1386 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1392 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1394 //force parent to wait until all the processes are done
1395 for (int i=0;i<processIDS.size();i++) {
1396 int temp = processIDS[i];
1400 for (int i = 0; i < processIDS.size(); i++) {
1402 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1403 m->openInputFile(tempFile, in);
1406 in >> tempNum; m->gobble(in);
1409 numChimeras += tempNum;
1411 in.close(); m->mothurRemove(tempFile);
1414 //////////////////////////////////////////////////////////////////////////////////////////////////////
1415 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1416 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1417 //////////////////////////////////////////////////////////////////////////////////////////////////////
1422 map<int, ofstream*> filehandles;
1423 map<int, ofstream*>::iterator it3;
1426 for (int i = 0; i < processors; i++) {
1427 temp = new ofstream;
1428 filehandles[i] = temp;
1429 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1430 files.push_back(filename+toString(i)+".temp");
1434 m->openInputFile(filename, in);
1438 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1440 Sequence tempSeq(in); m->gobble(in);
1442 if (tempSeq.getName() != "") {
1443 tempSeq.printSequence(*(filehandles[spot]));
1445 if (spot == processors) { spot = 0; }
1451 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1452 (*(it3->second)).close();
1456 //sanity check for number of processors
1457 if (count < processors) { processors = count; }
1459 vector<uchimeData*> pDataArray;
1460 DWORD dwThreadIdArray[processors-1];
1461 HANDLE hThreadArray[processors-1];
1462 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1464 //Create processor worker threads.
1465 for( int i=1; i<processors; i++ ){
1466 // Allocate memory for thread data.
1467 string extension = toString(i) + ".temp";
1469 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1470 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1471 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1473 pDataArray.push_back(tempUchime);
1474 processIDS.push_back(i);
1476 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1477 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1478 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1482 //using the main process as a worker saves time and memory
1483 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1485 //Wait until all threads have terminated.
1486 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1488 //Close all thread handles and free memory allocations.
1489 for(int i=0; i < pDataArray.size(); i++){
1490 num += pDataArray[i]->count;
1491 numChimeras += pDataArray[i]->numChimeras;
1492 CloseHandle(hThreadArray[i]);
1493 delete pDataArray[i];
1497 //append output files
1498 for(int i=0;i<processIDS.size();i++){
1499 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1500 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1502 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1503 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1506 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1507 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1511 //get rid of the file pieces.
1512 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1515 catch(exception& e) {
1516 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1520 /**************************************************************************************************/
1522 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1530 if (groups.size() < processors) { processors = groups.size(); }
1532 //divide the groups between the processors
1533 vector<linePair> lines;
1534 int numGroupsPerProcessor = groups.size() / processors;
1535 for (int i = 0; i < processors; i++) {
1536 int startIndex = i * numGroupsPerProcessor;
1537 int endIndex = (i+1) * numGroupsPerProcessor;
1538 if(i == (processors - 1)){ endIndex = groups.size(); }
1539 lines.push_back(linePair(startIndex, endIndex));
1542 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1544 //loop through and create all the processes you want
1545 while (process != processors) {
1549 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1551 }else if (pid == 0){
1552 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1554 //pass numSeqs to parent
1556 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1557 m->openOutputFile(tempFile, out);
1563 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1564 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1570 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1572 //force parent to wait until all the processes are done
1573 for (int i=0;i<processIDS.size();i++) {
1574 int temp = processIDS[i];
1578 for (int i = 0; i < processIDS.size(); i++) {
1580 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1581 m->openInputFile(tempFile, in);
1582 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1583 in.close(); m->mothurRemove(tempFile);
1587 //////////////////////////////////////////////////////////////////////////////////////////////////////
1588 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1589 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1590 //////////////////////////////////////////////////////////////////////////////////////////////////////
1592 vector<uchimeData*> pDataArray;
1593 DWORD dwThreadIdArray[processors-1];
1594 HANDLE hThreadArray[processors-1];
1596 //Create processor worker threads.
1597 for( int i=1; i<processors; i++ ){
1598 // Allocate memory for thread data.
1599 string extension = toString(i) + ".temp";
1601 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1602 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1603 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1605 pDataArray.push_back(tempUchime);
1606 processIDS.push_back(i);
1608 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1609 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1610 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1614 //using the main process as a worker saves time and memory
1615 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1617 //Wait until all threads have terminated.
1618 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1620 //Close all thread handles and free memory allocations.
1621 for(int i=0; i < pDataArray.size(); i++){
1622 num += pDataArray[i]->count;
1623 CloseHandle(hThreadArray[i]);
1624 delete pDataArray[i];
1629 //append output files
1630 for(int i=0;i<processIDS.size();i++){
1631 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1632 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1634 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1635 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1638 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1639 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1646 catch(exception& e) {
1647 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1651 /**************************************************************************************************/