2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46 vector<string> myArray;
47 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
51 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){
58 string helpString = "";
59 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
68 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
87 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
89 helpString += "The chimera.uchime command should be in the following format: \n";
90 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
96 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){
103 abort = true; calledHelp = true;
105 vector<string> tempOutNames;
106 outputTypes["chimera"] = tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
108 outputTypes["alns"] = tempOutNames;
110 catch(exception& e) {
111 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
118 abort = false; calledHelp = false;
119 ReferenceDB* rdb = ReferenceDB::getInstance();
121 //allow user to run help
122 if(option == "help") { help(); abort = true; calledHelp = true; }
123 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
126 vector<string> myArray = setParameters();
128 OptionParser parser(option);
129 map<string,string> parameters = parser.getParameters();
131 ValidParameters validParameter("chimera.uchime");
132 map<string,string>::iterator it;
134 //check to make sure all parameters are valid for command
135 for (it = parameters.begin(); it != parameters.end(); it++) {
136 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
139 vector<string> tempOutNames;
140 outputTypes["chimera"] = tempOutNames;
141 outputTypes["accnos"] = tempOutNames;
142 outputTypes["alns"] = tempOutNames;
144 //if the user changes the input directory command factory will send this info to us in the output parameter
145 string inputDir = validParameter.validFile(parameters, "inputdir", false);
146 if (inputDir == "not found"){ inputDir = ""; }
148 //check for required parameters
149 fastafile = validParameter.validFile(parameters, "fasta", false);
150 if (fastafile == "not found") {
151 //if there is a current fasta file, use it
152 string filename = m->getFastaFile();
153 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
156 m->splitAtDash(fastafile, fastaFileNames);
158 //go through files and make sure they are good, if not, then disregard them
159 for (int i = 0; i < fastaFileNames.size(); i++) {
162 if (fastaFileNames[i] == "current") {
163 fastaFileNames[i] = m->getFastaFile();
164 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
166 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
167 //erase from file list
168 fastaFileNames.erase(fastaFileNames.begin()+i);
175 if (inputDir != "") {
176 string path = m->hasPath(fastaFileNames[i]);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
184 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
186 //if you can't open it, try default location
187 if (ableToOpen == 1) {
188 if (m->getDefaultPath() != "") { //default path is set
189 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
192 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
194 fastaFileNames[i] = tryPath;
198 if (ableToOpen == 1) {
199 if (m->getOutputDir() != "") { //default path is set
200 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
203 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
205 fastaFileNames[i] = tryPath;
211 if (ableToOpen == 1) {
212 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
213 //erase from file list
214 fastaFileNames.erase(fastaFileNames.begin()+i);
217 m->setFastaFile(fastaFileNames[i]);
222 //make sure there is at least one valid file left
223 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
227 //check for required parameters
229 namefile = validParameter.validFile(parameters, "name", false);
230 if (namefile == "not found") { namefile = ""; hasName = false; }
232 m->splitAtDash(namefile, nameFileNames);
234 //go through files and make sure they are good, if not, then disregard them
235 for (int i = 0; i < nameFileNames.size(); i++) {
238 if (nameFileNames[i] == "current") {
239 nameFileNames[i] = m->getNameFile();
240 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
242 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
243 //erase from file list
244 nameFileNames.erase(nameFileNames.begin()+i);
251 if (inputDir != "") {
252 string path = m->hasPath(nameFileNames[i]);
253 //if the user has not given a path then, add inputdir. else leave path alone.
254 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
260 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
262 //if you can't open it, try default location
263 if (ableToOpen == 1) {
264 if (m->getDefaultPath() != "") { //default path is set
265 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
268 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
270 nameFileNames[i] = tryPath;
274 if (ableToOpen == 1) {
275 if (m->getOutputDir() != "") { //default path is set
276 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
279 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
281 nameFileNames[i] = tryPath;
287 if (ableToOpen == 1) {
288 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
289 //erase from file list
290 nameFileNames.erase(nameFileNames.begin()+i);
293 m->setNameFile(nameFileNames[i]);
298 //make sure there is at least one valid file left
299 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
302 if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
304 bool hasGroup = true;
305 groupfile = validParameter.validFile(parameters, "group", false);
306 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
308 m->splitAtDash(groupfile, groupFileNames);
310 //go through files and make sure they are good, if not, then disregard them
311 for (int i = 0; i < groupFileNames.size(); i++) {
314 if (groupFileNames[i] == "current") {
315 groupFileNames[i] = m->getGroupFile();
316 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
318 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
319 //erase from file list
320 groupFileNames.erase(groupFileNames.begin()+i);
327 if (inputDir != "") {
328 string path = m->hasPath(groupFileNames[i]);
329 //if the user has not given a path then, add inputdir. else leave path alone.
330 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
336 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
338 //if you can't open it, try default location
339 if (ableToOpen == 1) {
340 if (m->getDefaultPath() != "") { //default path is set
341 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
344 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
346 groupFileNames[i] = tryPath;
350 if (ableToOpen == 1) {
351 if (m->getOutputDir() != "") { //default path is set
352 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
355 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
357 groupFileNames[i] = tryPath;
363 if (ableToOpen == 1) {
364 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
365 //erase from file list
366 groupFileNames.erase(groupFileNames.begin()+i);
369 m->setGroupFile(groupFileNames[i]);
374 //make sure there is at least one valid file left
375 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
378 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
381 //if the user changes the output directory command factory will send this info to us in the output parameter
382 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
385 it = parameters.find("reference");
386 //user has given a template file
387 if(it != parameters.end()){
388 if (it->second == "self") { templatefile = "self"; }
390 path = m->hasPath(it->second);
391 //if the user has not given a path then, add inputdir. else leave path alone.
392 if (path == "") { parameters["reference"] = inputDir + it->second; }
394 templatefile = validParameter.validFile(parameters, "reference", true);
395 if (templatefile == "not open") { abort = true; }
396 else if (templatefile == "not found") { //check for saved reference sequences
397 if (rdb->getSavedReference() != "") {
398 templatefile = rdb->getSavedReference();
399 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
401 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
402 m->mothurOutEndLine();
407 }else if (hasName) { templatefile = "self"; }
409 if (rdb->getSavedReference() != "") {
410 templatefile = rdb->getSavedReference();
411 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
413 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
414 m->mothurOutEndLine();
415 templatefile = ""; abort = true;
419 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
420 m->setProcessors(temp);
421 convert(temp, processors);
423 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
424 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
426 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
427 chimealns = m->isTrue(temp);
429 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
430 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
431 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
432 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
433 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
434 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
435 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
436 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
437 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
438 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
439 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
440 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
442 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
443 ucl = m->isTrue(temp);
445 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
446 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
448 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
449 skipgaps = m->isTrue(temp);
451 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
452 skipgaps2 = m->isTrue(temp);
454 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
459 catch(exception& e) {
460 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
464 //***************************************************************************************************************
466 int ChimeraUchimeCommand::execute(){
468 if (abort == true) { if (calledHelp) { return 0; } return 2; }
470 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
472 for (int s = 0; s < fastaFileNames.size(); s++) {
474 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
476 int start = time(NULL);
477 string nameFile = "";
478 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
479 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
480 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos";
481 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns";
482 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
484 //you provided a groupfile
485 string groupFile = "";
486 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
488 if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
490 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
491 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
492 nameFile = nameFileNames[s];
493 }else { nameFile = getNamesFile(fastaFileNames[s]); }
495 map<string, string> seqs;
496 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
499 vector<seqPriorityNode> nameMapCount;
500 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
501 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
502 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
504 printFile(nameMapCount, newFasta);
505 fastaFileNames[s] = newFasta;
508 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
510 if (groupFile != "") {
511 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
512 nameFile = nameFileNames[s];
513 }else { nameFile = getNamesFile(fastaFileNames[s]); }
515 //Parse sequences by group
516 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
517 vector<string> groups = parser.getNamesOfGroups();
519 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
522 ofstream out, out1, out2;
523 m->openOutputFile(outputFileName, out); out.close();
524 m->openOutputFile(accnosFileName, out1); out1.close();
525 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
528 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
529 if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
530 else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups); }
532 totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);
534 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
536 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
538 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
539 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
541 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
544 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
548 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
549 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
550 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
552 numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras);
554 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
556 //remove file made for uchime
557 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
559 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
562 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
563 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
564 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
567 //set accnos file as new current accnosfile
569 itTypes = outputTypes.find("accnos");
570 if (itTypes != outputTypes.end()) {
571 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
574 m->mothurOutEndLine();
575 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
576 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
577 m->mothurOutEndLine();
582 catch(exception& e) {
583 m->errorOut(e, "ChimeraUchimeCommand", "execute");
587 //**********************************************************************************************************************
588 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
590 map<string, string> uniqueNames = parser.getAllSeqsMap();
591 map<string, string>::iterator itUnique;
596 m->openInputFile(accnosFileName, in2);
599 m->openOutputFile(accnosFileName+".temp", out2);
602 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
603 set<string>::iterator itNames;
604 set<string> chimerasInFile;
605 set<string>::iterator itChimeras;
609 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
611 in2 >> name; m->gobble(in2);
614 itUnique = uniqueNames.find(name);
616 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
618 itChimeras = chimerasInFile.find((itUnique->second));
620 if (itChimeras == chimerasInFile.end()) {
621 out2 << itUnique->second << endl;
622 chimerasInFile.insert((itUnique->second));
630 m->mothurRemove(accnosFileName);
631 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
637 m->openInputFile(outputFileName, in);
640 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
643 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
646 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
647 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
648 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
649 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
654 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
657 in >> temp1; m->gobble(in);
658 in >> name; m->gobble(in);
659 in >> parent1; m->gobble(in);
660 in >> parent2; m->gobble(in);
661 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
664 //parse name - name will look like U68590/ab=1/
665 string restOfName = "";
666 int pos = name.find_first_of('/');
667 if (pos != string::npos) {
668 restOfName = name.substr(pos);
669 name = name.substr(0, pos);
673 itUnique = uniqueNames.find(name);
675 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
677 name = itUnique->second;
678 //is this name already in the file
679 itNames = namesInFile.find((name));
681 if (itNames == namesInFile.end()) { //no not in file
682 if (flag == "N") { //are you really a no??
683 //is this sequence really not chimeric??
684 itChimeras = chimerasInFile.find(name);
686 //then you really are a no so print, otherwise skip
687 if (itChimeras == chimerasInFile.end()) { print = true; }
688 }else{ print = true; }
693 out << temp1 << '\t' << name << restOfName << '\t';
694 namesInFile.insert(name);
696 //parse parent1 names
697 if (parent1 != "*") {
699 pos = parent1.find_first_of('/');
700 if (pos != string::npos) {
701 restOfName = parent1.substr(pos);
702 parent1 = parent1.substr(0, pos);
705 itUnique = uniqueNames.find(parent1);
706 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
707 else { out << itUnique->second << restOfName << '\t'; }
708 }else { out << parent1 << '\t'; }
710 //parse parent2 names
711 if (parent2 != "*") {
713 pos = parent2.find_first_of('/');
714 if (pos != string::npos) {
715 restOfName = parent2.substr(pos);
716 parent2 = parent2.substr(0, pos);
719 itUnique = uniqueNames.find(parent2);
720 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
721 else { out << itUnique->second << restOfName << '\t'; }
722 }else { out << parent2 << '\t'; }
724 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
730 m->mothurRemove(outputFileName);
731 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
735 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
737 ------------------------------------------------------------------------
738 Query ( 179 nt) F21Fcsw_11639/ab=591/
739 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
740 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
742 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
743 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
744 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
745 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
746 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
747 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
749 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
750 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
751 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
752 Diffs NNN N N N N N BB NNN
753 Votes 000 0 0 0 0 0 ++ 000
754 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
756 A 159 TGGAACTGAGACACGGTCCAA 179
757 Q 159 TGGAACTGAGACACGGTCCAA 179
758 B 161 TGGAACTGAGACACGGTCCAA 181
761 Model BBBBBBBBBBBBBBBBBBBBB
763 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
764 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
768 m->openInputFile(alnsFileName, in3);
771 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
778 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
781 line = m->getline(in3);
785 istringstream iss(line);
788 //are you a name line
789 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
791 for (int i = 0; i < line.length(); i++) {
793 if (line[i] == ')') { break; }
794 else { out3 << line[i]; }
797 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
798 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
800 out << line[spot] << line[spot+1];
802 name = line.substr(spot+2);
804 //parse name - name will either look like U68590/ab=1/ or U68590
805 string restOfName = "";
806 int pos = name.find_first_of('/');
807 if (pos != string::npos) {
808 restOfName = name.substr(pos);
809 name = name.substr(0, pos);
813 itUnique = uniqueNames.find(name);
815 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
817 //only limit repeats on query names
818 if (temp == "Query") {
819 itNames = namesInFile.find((itUnique->second));
821 if (itNames == namesInFile.end()) {
822 out << itUnique->second << restOfName << endl;
823 namesInFile.insert((itUnique->second));
825 }else { out << itUnique->second << restOfName << endl; }
830 }else { //not need to alter line
831 out3 << line << endl;
833 }else { out3 << endl; }
838 m->mothurRemove(alnsFileName);
839 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
844 catch(exception& e) {
845 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
849 //**********************************************************************************************************************
850 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
853 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
856 m->openOutputFile(filename, out);
858 //print new file in order of
859 for (int i = 0; i < nameMapCount.size(); i++) {
860 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
866 catch(exception& e) {
867 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
871 //**********************************************************************************************************************
872 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
874 //create input file for uchime
875 //read through fastafile and store info
877 m->openInputFile(filename, in);
881 if (m->control_pressed) { in.close(); return 0; }
883 Sequence seq(in); m->gobble(in);
884 seqs[seq.getName()] = seq.getAligned();
890 catch(exception& e) {
891 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
895 //**********************************************************************************************************************
897 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
899 string nameFile = "";
901 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
903 //use unique.seqs to create new name and fastafile
904 string inputString = "fasta=" + inputFile;
905 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
906 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
908 Command* uniqueCommand = new DeconvoluteCommand(inputString);
909 uniqueCommand->execute();
911 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
913 delete uniqueCommand;
915 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
917 nameFile = filenames["name"][0];
918 inputFile = filenames["fasta"][0];
922 catch(exception& e) {
923 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
927 //**********************************************************************************************************************
928 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
934 for (int i = start; i < end; i++) {
935 int start = time(NULL); if (m->control_pressed) { return 0; }
937 int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; }
939 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
940 totalSeqs += numSeqs;
942 if (m->control_pressed) { return 0; }
944 //remove file made for uchime
945 m->mothurRemove(filename);
948 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
949 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
950 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
952 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
958 catch(exception& e) {
959 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
963 //**********************************************************************************************************************
965 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
970 char* tempUchime = new char[8];
971 strcpy(tempUchime, "./uchime ");
972 cPara.push_back(tempUchime);
974 char* tempIn = new char[8];
975 *tempIn = '\0'; strncat(tempIn, "--input", 7);
976 //strcpy(tempIn, "--input");
977 cPara.push_back(tempIn);
978 char* temp = new char[filename.length()+1];
979 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
980 //strcpy(temp, filename.c_str());
981 cPara.push_back(temp);
983 //are you using a reference file
984 if (templatefile != "self") {
986 char* tempRef = new char[5];
987 //strcpy(tempRef, "--db");
988 *tempRef = '\0'; strncat(tempRef, "--db", 4);
989 cPara.push_back(tempRef);
990 char* tempR = new char[templatefile.length()+1];
991 //strcpy(tempR, templatefile.c_str());
992 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
993 cPara.push_back(tempR);
996 char* tempO = new char[12];
997 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
998 //strcpy(tempO, "--uchimeout");
999 cPara.push_back(tempO);
1000 char* tempout = new char[outputFName.length()+1];
1001 //strcpy(tempout, outputFName.c_str());
1002 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1003 cPara.push_back(tempout);
1006 char* tempA = new char[13];
1007 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1008 //strcpy(tempA, "--uchimealns");
1009 cPara.push_back(tempA);
1010 char* tempa = new char[alns.length()+1];
1011 //strcpy(tempa, alns.c_str());
1012 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1013 cPara.push_back(tempa);
1017 char* tempskew = new char[9];
1018 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1019 //strcpy(tempskew, "--abskew");
1020 cPara.push_back(tempskew);
1021 char* tempSkew = new char[abskew.length()+1];
1022 //strcpy(tempSkew, abskew.c_str());
1023 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1024 cPara.push_back(tempSkew);
1028 char* tempminh = new char[7];
1029 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1030 //strcpy(tempminh, "--minh");
1031 cPara.push_back(tempminh);
1032 char* tempMinH = new char[minh.length()+1];
1033 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1034 //strcpy(tempMinH, minh.c_str());
1035 cPara.push_back(tempMinH);
1039 char* tempmindiv = new char[9];
1040 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1041 //strcpy(tempmindiv, "--mindiv");
1042 cPara.push_back(tempmindiv);
1043 char* tempMindiv = new char[mindiv.length()+1];
1044 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1045 //strcpy(tempMindiv, mindiv.c_str());
1046 cPara.push_back(tempMindiv);
1050 char* tempxn = new char[5];
1051 //strcpy(tempxn, "--xn");
1052 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1053 cPara.push_back(tempxn);
1054 char* tempXn = new char[xn.length()+1];
1055 //strcpy(tempXn, xn.c_str());
1056 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1057 cPara.push_back(tempXn);
1061 char* tempdn = new char[5];
1062 //strcpy(tempdn, "--dn");
1063 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1064 cPara.push_back(tempdn);
1065 char* tempDn = new char[dn.length()+1];
1066 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1067 //strcpy(tempDn, dn.c_str());
1068 cPara.push_back(tempDn);
1072 char* tempxa = new char[5];
1073 //strcpy(tempxa, "--xa");
1074 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1075 cPara.push_back(tempxa);
1076 char* tempXa = new char[xa.length()+1];
1077 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1078 //strcpy(tempXa, xa.c_str());
1079 cPara.push_back(tempXa);
1083 char* tempchunks = new char[9];
1084 //strcpy(tempchunks, "--chunks");
1085 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1086 cPara.push_back(tempchunks);
1087 char* tempChunks = new char[chunks.length()+1];
1088 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1089 //strcpy(tempChunks, chunks.c_str());
1090 cPara.push_back(tempChunks);
1094 char* tempminchunk = new char[11];
1095 //strcpy(tempminchunk, "--minchunk");
1096 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1097 cPara.push_back(tempminchunk);
1098 char* tempMinchunk = new char[minchunk.length()+1];
1099 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1100 //strcpy(tempMinchunk, minchunk.c_str());
1101 cPara.push_back(tempMinchunk);
1104 if (useIdsmoothwindow) {
1105 char* tempidsmoothwindow = new char[17];
1106 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1107 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1108 cPara.push_back(tempidsmoothwindow);
1109 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1110 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1111 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1112 cPara.push_back(tempIdsmoothwindow);
1115 /*if (useMinsmoothid) {
1116 char* tempminsmoothid = new char[14];
1117 //strcpy(tempminsmoothid, "--minsmoothid");
1118 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1119 cPara.push_back(tempminsmoothid);
1120 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1121 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1122 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1123 cPara.push_back(tempMinsmoothid);
1127 char* tempmaxp = new char[7];
1128 //strcpy(tempmaxp, "--maxp");
1129 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1130 cPara.push_back(tempmaxp);
1131 char* tempMaxp = new char[maxp.length()+1];
1132 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1133 //strcpy(tempMaxp, maxp.c_str());
1134 cPara.push_back(tempMaxp);
1138 char* tempskipgaps = new char[13];
1139 //strcpy(tempskipgaps, "--[no]skipgaps");
1140 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1141 cPara.push_back(tempskipgaps);
1145 char* tempskipgaps2 = new char[14];
1146 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1147 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1148 cPara.push_back(tempskipgaps2);
1152 char* tempminlen = new char[9];
1153 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1154 //strcpy(tempminlen, "--minlen");
1155 cPara.push_back(tempminlen);
1156 char* tempMinlen = new char[minlen.length()+1];
1157 //strcpy(tempMinlen, minlen.c_str());
1158 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1159 cPara.push_back(tempMinlen);
1163 char* tempmaxlen = new char[9];
1164 //strcpy(tempmaxlen, "--maxlen");
1165 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1166 cPara.push_back(tempmaxlen);
1167 char* tempMaxlen = new char[maxlen.length()+1];
1168 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1169 //strcpy(tempMaxlen, maxlen.c_str());
1170 cPara.push_back(tempMaxlen);
1174 char* tempucl = new char[5];
1175 strcpy(tempucl, "--ucl");
1176 cPara.push_back(tempucl);
1179 if (useQueryfract) {
1180 char* tempqueryfract = new char[13];
1181 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1182 //strcpy(tempqueryfract, "--queryfract");
1183 cPara.push_back(tempqueryfract);
1184 char* tempQueryfract = new char[queryfract.length()+1];
1185 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1186 //strcpy(tempQueryfract, queryfract.c_str());
1187 cPara.push_back(tempQueryfract);
1191 char** uchimeParameters;
1192 uchimeParameters = new char*[cPara.size()];
1193 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; }
1194 int numArgs = cPara.size();
1196 uchime_main(numArgs, uchimeParameters);
1199 for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; }
1200 delete[] uchimeParameters;
1202 if (m->control_pressed) { return 0; }
1204 //create accnos file from uchime results
1206 m->openInputFile(outputFName, in);
1209 m->openOutputFile(accnos, out);
1215 if (m->control_pressed) { break; }
1218 string chimeraFlag = "";
1219 in >> chimeraFlag >> name;
1221 //fix name if needed
1222 if (templatefile == "self") {
1223 name = name.substr(0, name.length()-1); //rip off last /
1224 name = name.substr(0, name.find_last_of('/'));
1227 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1230 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1238 catch(exception& e) {
1239 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1243 /**************************************************************************************************/
1245 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1251 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1252 //break up file into multiple files
1253 vector<string> files;
1254 m->divideFile(filename, processors, files);
1256 if (m->control_pressed) { return 0; }
1258 //loop through and create all the processes you want
1259 while (process != processors) {
1263 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1265 }else if (pid == 0){
1266 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1268 //pass numSeqs to parent
1270 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1271 m->openOutputFile(tempFile, out);
1273 out << numChimeras << endl;
1278 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1279 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1285 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1287 //force parent to wait until all the processes are done
1288 for (int i=0;i<processIDS.size();i++) {
1289 int temp = processIDS[i];
1293 for (int i = 0; i < processIDS.size(); i++) {
1295 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1296 m->openInputFile(tempFile, in);
1299 in >> tempNum; m->gobble(in);
1302 numChimeras += tempNum;
1304 in.close(); m->mothurRemove(tempFile);
1308 //append output files
1309 for(int i=0;i<processIDS[i];i++){
1310 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1311 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1313 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1314 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1317 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1318 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1322 //get rid of the file pieces.
1323 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1327 catch(exception& e) {
1328 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1332 /**************************************************************************************************/
1334 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups) {
1342 if (groups.size() < processors) { processors = groups.size(); }
1344 //divide the groups between the processors
1345 vector<linePair> lines;
1346 int numGroupsPerProcessor = groups.size() / processors;
1347 for (int i = 0; i < processors; i++) {
1348 int startIndex = i * numGroupsPerProcessor;
1349 int endIndex = (i+1) * numGroupsPerProcessor;
1350 if(i == (processors - 1)){ endIndex = groups.size(); }
1351 lines.push_back(linePair(startIndex, endIndex));
1354 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1356 //loop through and create all the processes you want
1357 while (process != processors) {
1361 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1363 }else if (pid == 0){
1364 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1366 //pass numSeqs to parent
1368 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1369 m->openOutputFile(tempFile, out);
1375 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1376 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1382 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1384 //force parent to wait until all the processes are done
1385 for (int i=0;i<processIDS.size();i++) {
1386 int temp = processIDS[i];
1391 for (int i = 0; i < processIDS.size(); i++) {
1393 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1394 m->openInputFile(tempFile, in);
1395 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1396 in.close(); m->mothurRemove(tempFile);
1400 //append output files
1401 for(int i=0;i<processIDS[i];i++){
1402 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1403 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1405 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1406 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1409 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1410 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1417 catch(exception& e) {
1418 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1422 /**************************************************************************************************/