2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 catch(exception& e) {
136 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
143 abort = false; calledHelp = false; hasName=false; hasCount=false;
144 ReferenceDB* rdb = ReferenceDB::getInstance();
146 //allow user to run help
147 if(option == "help") { help(); abort = true; calledHelp = true; }
148 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151 vector<string> myArray = setParameters();
153 OptionParser parser(option);
154 map<string,string> parameters = parser.getParameters();
156 ValidParameters validParameter("chimera.uchime");
157 map<string,string>::iterator it;
159 //check to make sure all parameters are valid for command
160 for (it = parameters.begin(); it != parameters.end(); it++) {
161 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
164 vector<string> tempOutNames;
165 outputTypes["chimera"] = tempOutNames;
166 outputTypes["accnos"] = tempOutNames;
167 outputTypes["alns"] = tempOutNames;
168 outputTypes["count"] = tempOutNames;
170 //if the user changes the input directory command factory will send this info to us in the output parameter
171 string inputDir = validParameter.validFile(parameters, "inputdir", false);
172 if (inputDir == "not found"){ inputDir = ""; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", false);
176 if (fastafile == "not found") {
177 //if there is a current fasta file, use it
178 string filename = m->getFastaFile();
179 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182 m->splitAtDash(fastafile, fastaFileNames);
184 //go through files and make sure they are good, if not, then disregard them
185 for (int i = 0; i < fastaFileNames.size(); i++) {
188 if (fastaFileNames[i] == "current") {
189 fastaFileNames[i] = m->getFastaFile();
190 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
193 //erase from file list
194 fastaFileNames.erase(fastaFileNames.begin()+i);
201 if (inputDir != "") {
202 string path = m->hasPath(fastaFileNames[i]);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
210 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212 //if you can't open it, try default location
213 if (ableToOpen == 1) {
214 if (m->getDefaultPath() != "") { //default path is set
215 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220 fastaFileNames[i] = tryPath;
224 if (ableToOpen == 1) {
225 if (m->getOutputDir() != "") { //default path is set
226 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231 fastaFileNames[i] = tryPath;
237 if (ableToOpen == 1) {
238 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
239 //erase from file list
240 fastaFileNames.erase(fastaFileNames.begin()+i);
243 m->setFastaFile(fastaFileNames[i]);
248 //make sure there is at least one valid file left
249 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
253 //check for required parameters
254 namefile = validParameter.validFile(parameters, "name", false);
255 if (namefile == "not found") { namefile = ""; }
257 m->splitAtDash(namefile, nameFileNames);
259 //go through files and make sure they are good, if not, then disregard them
260 for (int i = 0; i < nameFileNames.size(); i++) {
263 if (nameFileNames[i] == "current") {
264 nameFileNames[i] = m->getNameFile();
265 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
268 //erase from file list
269 nameFileNames.erase(nameFileNames.begin()+i);
276 if (inputDir != "") {
277 string path = m->hasPath(nameFileNames[i]);
278 //if the user has not given a path then, add inputdir. else leave path alone.
279 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
285 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287 //if you can't open it, try default location
288 if (ableToOpen == 1) {
289 if (m->getDefaultPath() != "") { //default path is set
290 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295 nameFileNames[i] = tryPath;
299 if (ableToOpen == 1) {
300 if (m->getOutputDir() != "") { //default path is set
301 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306 nameFileNames[i] = tryPath;
312 if (ableToOpen == 1) {
313 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
314 //erase from file list
315 nameFileNames.erase(nameFileNames.begin()+i);
318 m->setNameFile(nameFileNames[i]);
324 if (nameFileNames.size() != 0) { hasName = true; }
326 //check for required parameters
327 vector<string> countfileNames;
328 countfile = validParameter.validFile(parameters, "count", false);
329 if (countfile == "not found") {
332 m->splitAtDash(countfile, countfileNames);
334 //go through files and make sure they are good, if not, then disregard them
335 for (int i = 0; i < countfileNames.size(); i++) {
338 if (countfileNames[i] == "current") {
339 countfileNames[i] = m->getCountTableFile();
340 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
343 //erase from file list
344 countfileNames.erase(countfileNames.begin()+i);
351 if (inputDir != "") {
352 string path = m->hasPath(countfileNames[i]);
353 //if the user has not given a path then, add inputdir. else leave path alone.
354 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
360 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362 //if you can't open it, try default location
363 if (ableToOpen == 1) {
364 if (m->getDefaultPath() != "") { //default path is set
365 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370 countfileNames[i] = tryPath;
374 if (ableToOpen == 1) {
375 if (m->getOutputDir() != "") { //default path is set
376 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381 countfileNames[i] = tryPath;
387 if (ableToOpen == 1) {
388 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
389 //erase from file list
390 countfileNames.erase(countfileNames.begin()+i);
393 m->setCountTableFile(countfileNames[i]);
399 if (countfileNames.size() != 0) { hasCount = true; }
401 //make sure there is at least one valid file left
402 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404 if (!hasName && hasCount) { nameFileNames = countfileNames; }
406 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408 bool hasGroup = true;
409 groupfile = validParameter.validFile(parameters, "group", false);
410 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
412 m->splitAtDash(groupfile, groupFileNames);
414 //go through files and make sure they are good, if not, then disregard them
415 for (int i = 0; i < groupFileNames.size(); i++) {
418 if (groupFileNames[i] == "current") {
419 groupFileNames[i] = m->getGroupFile();
420 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
423 //erase from file list
424 groupFileNames.erase(groupFileNames.begin()+i);
431 if (inputDir != "") {
432 string path = m->hasPath(groupFileNames[i]);
433 //if the user has not given a path then, add inputdir. else leave path alone.
434 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
440 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442 //if you can't open it, try default location
443 if (ableToOpen == 1) {
444 if (m->getDefaultPath() != "") { //default path is set
445 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450 groupFileNames[i] = tryPath;
454 if (ableToOpen == 1) {
455 if (m->getOutputDir() != "") { //default path is set
456 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461 groupFileNames[i] = tryPath;
467 if (ableToOpen == 1) {
468 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
469 //erase from file list
470 groupFileNames.erase(groupFileNames.begin()+i);
473 m->setGroupFile(groupFileNames[i]);
478 //make sure there is at least one valid file left
479 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 //if the user changes the output directory command factory will send this info to us in the output parameter
490 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
493 it = parameters.find("reference");
494 //user has given a template file
495 if(it != parameters.end()){
496 if (it->second == "self") { templatefile = "self"; }
498 path = m->hasPath(it->second);
499 //if the user has not given a path then, add inputdir. else leave path alone.
500 if (path == "") { parameters["reference"] = inputDir + it->second; }
502 templatefile = validParameter.validFile(parameters, "reference", true);
503 if (templatefile == "not open") { abort = true; }
504 else if (templatefile == "not found") { //check for saved reference sequences
505 if (rdb->getSavedReference() != "") {
506 templatefile = rdb->getSavedReference();
507 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
510 m->mothurOutEndLine();
515 }else if (hasName) { templatefile = "self"; }
516 else if (hasCount) { templatefile = "self"; }
518 if (rdb->getSavedReference() != "") {
519 templatefile = rdb->getSavedReference();
520 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
523 m->mothurOutEndLine();
524 templatefile = ""; abort = true;
528 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
529 m->setProcessors(temp);
530 m->mothurConvert(temp, processors);
532 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
533 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
536 chimealns = m->isTrue(temp);
538 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
539 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
540 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
541 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
542 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
543 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
544 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
545 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
546 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
547 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
548 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
549 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
551 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
553 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
554 ucl = m->isTrue(temp);
556 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
557 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
559 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
560 skipgaps = m->isTrue(temp);
562 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
563 skipgaps2 = m->isTrue(temp);
566 temp = validParameter.validFile(parameters, "dereplicate", false);
567 if (temp == "not found") {
568 if (groupfile != "") { temp = "false"; }
569 else { temp = "true"; }
571 dups = m->isTrue(temp);
574 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
577 //look for uchime exe
579 string tempPath = path;
580 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581 path = path.substr(0, (tempPath.find_last_of('m')));
583 string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
587 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
588 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589 newCommand->execute();
591 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
592 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593 newCommand->execute();
597 uchimeCommand = path + "uchime.exe";
600 //test to make sure uchime exists
602 uchimeCommand = m->getFullPathName(uchimeCommand);
603 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604 if(ableToOpen == 1) {
605 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606 //check to see if uchime is in the path??
608 string uLocation = m->findProgramPath("uchime");
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
615 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
618 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
619 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620 }else { uchimeLocation = uchimeCommand; }
622 uchimeLocation = m->getFullPathName(uchimeLocation);
625 catch(exception& e) {
626 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
630 //***************************************************************************************************************
632 int ChimeraUchimeCommand::execute(){
635 if (abort == true) { if (calledHelp) { return 0; } return 2; }
637 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
639 for (int s = 0; s < fastaFileNames.size(); s++) {
641 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
643 int start = time(NULL);
644 string nameFile = "";
645 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
646 map<string, string> variables;
647 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648 string outputFileName = getOutputFileName("chimera", variables);
649 string accnosFileName = getOutputFileName("accnos", variables);
650 string alnsFileName = getOutputFileName("alns", variables);
651 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652 string newCountFile = "";
654 //you provided a groupfile
655 string groupFile = "";
656 bool hasGroup = false;
657 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
660 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662 newCountFile = getOutputFileName("count", variables);
665 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
667 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669 nameFile = nameFileNames[s];
670 }else { nameFile = getNamesFile(fastaFileNames[s]); }
672 map<string, string> seqs;
673 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
676 vector<seqPriorityNode> nameMapCount;
680 ct.readTable(nameFile);
681 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682 int num = ct.getNumSeqs(it->first);
683 if (num == 0) { error = 1; }
685 seqPriorityNode temp(num, it->second, it->first);
686 nameMapCount.push_back(temp);
690 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
692 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
693 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
695 printFile(nameMapCount, newFasta);
696 fastaFileNames[s] = newFasta;
699 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
702 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703 nameFile = nameFileNames[s];
704 }else { nameFile = getNamesFile(fastaFileNames[s]); }
706 //Parse sequences by group
707 vector<string> groups;
708 map<string, string> uniqueNames;
710 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711 groups = cparser->getNamesOfGroups();
712 uniqueNames = cparser->getAllSeqsMap();
714 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715 groups = sparser->getNamesOfGroups();
716 uniqueNames = sparser->getAllSeqsMap();
719 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
722 ofstream out, out1, out2;
723 m->openOutputFile(outputFileName, out); out.close();
724 m->openOutputFile(accnosFileName, out1); out1.close();
725 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
728 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
730 if (hasCount && dups) {
731 CountTable c; c.readTable(nameFile);
732 if (!m->isBlank(newCountFile)) {
734 m->openInputFile(newCountFile, in2);
738 in2 >> name >> group; m->gobble(in2);
739 c.setAbund(name, group, 0);
743 m->mothurRemove(newCountFile);
744 c.printTable(newCountFile);
747 }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
749 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
753 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
755 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
756 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
760 set<string> doNotRemove;
761 CountTable c; c.readTable(newCountFile);
762 vector<string> namesInTable = c.getNamesOfSeqs();
763 for (int i = 0; i < namesInTable.size(); i++) {
764 int temp = c.getNumSeqs(namesInTable[i]);
765 if (temp == 0) { c.remove(namesInTable[i]); }
766 else { doNotRemove.insert((namesInTable[i])); }
768 //remove names we want to keep from accnos file.
769 set<string> accnosNames = m->readAccnos(accnosFileName);
771 m->openOutputFile(accnosFileName, out2);
772 for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
773 if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
776 c.printTable(newCountFile);
777 outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
781 if (hasCount) { delete cparser; }
782 else { delete sparser; }
784 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
787 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
792 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
797 m->openOutputFile(outputFileName+".temp", out);
798 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
801 m->appendFiles(outputFileName, outputFileName+".temp");
802 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
804 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
806 //remove file made for uchime
807 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
809 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
812 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
813 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
814 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
817 //set accnos file as new current accnosfile
819 itTypes = outputTypes.find("accnos");
820 if (itTypes != outputTypes.end()) {
821 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
824 m->mothurOutEndLine();
825 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
826 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
827 m->mothurOutEndLine();
832 catch(exception& e) {
833 m->errorOut(e, "ChimeraUchimeCommand", "execute");
837 //**********************************************************************************************************************
838 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
840 map<string, string>::iterator itUnique;
844 m->openOutputFile(accnosFileName+".temp", out2);
847 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
848 set<string>::iterator itNames;
849 set<string> chimerasInFile;
850 set<string>::iterator itChimeras;
852 if (!m->isBlank(accnosFileName)) {
855 m->openInputFile(accnosFileName, in2);
858 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
860 in2 >> name; m->gobble(in2);
863 itUnique = uniqueNames.find(name);
865 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
867 itChimeras = chimerasInFile.find((itUnique->second));
869 if (itChimeras == chimerasInFile.end()) {
870 out2 << itUnique->second << endl;
871 chimerasInFile.insert((itUnique->second));
880 m->mothurRemove(accnosFileName);
881 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
887 m->openInputFile(outputFileName, in);
890 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
891 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
894 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
897 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
898 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
899 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
900 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
905 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
908 in >> temp1; m->gobble(in);
909 in >> name; m->gobble(in);
910 in >> parent1; m->gobble(in);
911 in >> parent2; m->gobble(in);
912 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
915 //parse name - name will look like U68590/ab=1/
916 string restOfName = "";
917 int pos = name.find_first_of('/');
918 if (pos != string::npos) {
919 restOfName = name.substr(pos);
920 name = name.substr(0, pos);
924 itUnique = uniqueNames.find(name);
926 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
928 name = itUnique->second;
929 //is this name already in the file
930 itNames = namesInFile.find((name));
932 if (itNames == namesInFile.end()) { //no not in file
933 if (flag == "N") { //are you really a no??
934 //is this sequence really not chimeric??
935 itChimeras = chimerasInFile.find(name);
937 //then you really are a no so print, otherwise skip
938 if (itChimeras == chimerasInFile.end()) { print = true; }
939 }else{ print = true; }
944 out << temp1 << '\t' << name << restOfName << '\t';
945 namesInFile.insert(name);
947 //parse parent1 names
948 if (parent1 != "*") {
950 pos = parent1.find_first_of('/');
951 if (pos != string::npos) {
952 restOfName = parent1.substr(pos);
953 parent1 = parent1.substr(0, pos);
956 itUnique = uniqueNames.find(parent1);
957 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
958 else { out << itUnique->second << restOfName << '\t'; }
959 }else { out << parent1 << '\t'; }
961 //parse parent2 names
962 if (parent2 != "*") {
964 pos = parent2.find_first_of('/');
965 if (pos != string::npos) {
966 restOfName = parent2.substr(pos);
967 parent2 = parent2.substr(0, pos);
970 itUnique = uniqueNames.find(parent2);
971 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
972 else { out << itUnique->second << restOfName << '\t'; }
973 }else { out << parent2 << '\t'; }
975 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
981 m->mothurRemove(outputFileName);
982 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
986 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
988 ------------------------------------------------------------------------
989 Query ( 179 nt) F21Fcsw_11639/ab=591/
990 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
991 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
993 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
994 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
995 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
996 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
997 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
998 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1000 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1001 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1002 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1003 Diffs NNN N N N N N BB NNN
1004 Votes 000 0 0 0 0 0 ++ 000
1005 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1007 A 159 TGGAACTGAGACACGGTCCAA 179
1008 Q 159 TGGAACTGAGACACGGTCCAA 179
1009 B 161 TGGAACTGAGACACGGTCCAA 181
1012 Model BBBBBBBBBBBBBBBBBBBBB
1014 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1015 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1019 m->openInputFile(alnsFileName, in3);
1022 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1025 namesInFile.clear();
1028 while (!in3.eof()) {
1029 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1032 line = m->getline(in3);
1036 istringstream iss(line);
1039 //are you a name line
1040 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1042 for (int i = 0; i < line.length(); i++) {
1044 if (line[i] == ')') { break; }
1045 else { out3 << line[i]; }
1048 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1049 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1051 out << line[spot] << line[spot+1];
1053 name = line.substr(spot+2);
1055 //parse name - name will either look like U68590/ab=1/ or U68590
1056 string restOfName = "";
1057 int pos = name.find_first_of('/');
1058 if (pos != string::npos) {
1059 restOfName = name.substr(pos);
1060 name = name.substr(0, pos);
1064 itUnique = uniqueNames.find(name);
1066 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1068 //only limit repeats on query names
1069 if (temp == "Query") {
1070 itNames = namesInFile.find((itUnique->second));
1072 if (itNames == namesInFile.end()) {
1073 out << itUnique->second << restOfName << endl;
1074 namesInFile.insert((itUnique->second));
1076 }else { out << itUnique->second << restOfName << endl; }
1081 }else { //not need to alter line
1082 out3 << line << endl;
1084 }else { out3 << endl; }
1089 m->mothurRemove(alnsFileName);
1090 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1095 catch(exception& e) {
1096 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1100 //**********************************************************************************************************************
1101 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1104 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1107 m->openOutputFile(filename, out);
1109 //print new file in order of
1110 for (int i = 0; i < nameMapCount.size(); i++) {
1111 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1117 catch(exception& e) {
1118 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1122 //**********************************************************************************************************************
1123 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1125 //create input file for uchime
1126 //read through fastafile and store info
1128 m->openInputFile(filename, in);
1132 if (m->control_pressed) { in.close(); return 0; }
1134 Sequence seq(in); m->gobble(in);
1135 seqs[seq.getName()] = seq.getAligned();
1141 catch(exception& e) {
1142 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1146 //**********************************************************************************************************************
1148 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1150 string nameFile = "";
1152 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1154 //use unique.seqs to create new name and fastafile
1155 string inputString = "fasta=" + inputFile;
1156 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1157 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1158 m->mothurCalling = true;
1160 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1161 uniqueCommand->execute();
1163 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1165 delete uniqueCommand;
1166 m->mothurCalling = false;
1167 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1169 nameFile = filenames["name"][0];
1170 inputFile = filenames["fasta"][0];
1174 catch(exception& e) {
1175 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1179 //**********************************************************************************************************************
1180 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1184 int numChimeras = 0;
1187 ofstream outCountList;
1188 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1190 for (int i = start; i < end; i++) {
1191 int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
1194 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1195 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1197 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1198 totalSeqs += numSeqs;
1200 if (m->control_pressed) { return 0; }
1202 //remove file made for uchime
1203 if (!m->debug) { m->mothurRemove(filename); }
1204 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1206 //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1207 //This table will zero out group counts for seqs determined to be chimeric by that group.
1209 if (!m->isBlank(accnos+groups[i])) {
1211 m->openInputFile(accnos+groups[i], in);
1215 in >> name; m->gobble(in);
1216 outCountList << name << '\t' << groups[i] << endl;
1220 map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1221 map<string, string>::iterator itN;
1223 m->openOutputFile(accnos+groups[i]+".temp", out);
1225 in >> name; m->gobble(in);
1226 itN = thisnamemap.find(name);
1227 if (itN != thisnamemap.end()) {
1228 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1229 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1231 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1235 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1242 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1243 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1244 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1246 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1249 if (hasCount && dups) { outCountList.close(); }
1254 catch(exception& e) {
1255 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1259 //**********************************************************************************************************************
1261 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1264 outputFName = m->getFullPathName(outputFName);
1265 filename = m->getFullPathName(filename);
1266 alns = m->getFullPathName(alns);
1268 //to allow for spaces in the path
1269 outputFName = "\"" + outputFName + "\"";
1270 filename = "\"" + filename + "\"";
1271 alns = "\"" + alns + "\"";
1273 vector<char*> cPara;
1275 string uchimeCommand = uchimeLocation;
1276 uchimeCommand = "\"" + uchimeCommand + "\" ";
1279 tempUchime= new char[uchimeCommand.length()+1];
1281 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1282 cPara.push_back(tempUchime);
1284 //are you using a reference file
1285 if (templatefile != "self") {
1286 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1287 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1288 filename = outputFileName;
1289 filename = "\"" + filename + "\"";
1290 //add reference file
1291 char* tempRef = new char[5];
1292 //strcpy(tempRef, "--db");
1293 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1294 cPara.push_back(tempRef);
1295 char* tempR = new char[templatefile.length()+1];
1296 //strcpy(tempR, templatefile.c_str());
1297 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1298 cPara.push_back(tempR);
1301 char* tempIn = new char[8];
1302 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1303 //strcpy(tempIn, "--input");
1304 cPara.push_back(tempIn);
1305 char* temp = new char[filename.length()+1];
1306 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1307 //strcpy(temp, filename.c_str());
1308 cPara.push_back(temp);
1310 char* tempO = new char[12];
1311 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1312 //strcpy(tempO, "--uchimeout");
1313 cPara.push_back(tempO);
1314 char* tempout = new char[outputFName.length()+1];
1315 //strcpy(tempout, outputFName.c_str());
1316 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1317 cPara.push_back(tempout);
1320 char* tempA = new char[13];
1321 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1322 //strcpy(tempA, "--uchimealns");
1323 cPara.push_back(tempA);
1324 char* tempa = new char[alns.length()+1];
1325 //strcpy(tempa, alns.c_str());
1326 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1327 cPara.push_back(tempa);
1331 char* tempA = new char[9];
1332 *tempA = '\0'; strncat(tempA, "--strand", 8);
1333 cPara.push_back(tempA);
1334 char* tempa = new char[strand.length()+1];
1335 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1336 cPara.push_back(tempa);
1340 char* tempskew = new char[9];
1341 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1342 //strcpy(tempskew, "--abskew");
1343 cPara.push_back(tempskew);
1344 char* tempSkew = new char[abskew.length()+1];
1345 //strcpy(tempSkew, abskew.c_str());
1346 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1347 cPara.push_back(tempSkew);
1351 char* tempminh = new char[7];
1352 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1353 //strcpy(tempminh, "--minh");
1354 cPara.push_back(tempminh);
1355 char* tempMinH = new char[minh.length()+1];
1356 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1357 //strcpy(tempMinH, minh.c_str());
1358 cPara.push_back(tempMinH);
1362 char* tempmindiv = new char[9];
1363 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1364 //strcpy(tempmindiv, "--mindiv");
1365 cPara.push_back(tempmindiv);
1366 char* tempMindiv = new char[mindiv.length()+1];
1367 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1368 //strcpy(tempMindiv, mindiv.c_str());
1369 cPara.push_back(tempMindiv);
1373 char* tempxn = new char[5];
1374 //strcpy(tempxn, "--xn");
1375 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1376 cPara.push_back(tempxn);
1377 char* tempXn = new char[xn.length()+1];
1378 //strcpy(tempXn, xn.c_str());
1379 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1380 cPara.push_back(tempXn);
1384 char* tempdn = new char[5];
1385 //strcpy(tempdn, "--dn");
1386 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1387 cPara.push_back(tempdn);
1388 char* tempDn = new char[dn.length()+1];
1389 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1390 //strcpy(tempDn, dn.c_str());
1391 cPara.push_back(tempDn);
1395 char* tempxa = new char[5];
1396 //strcpy(tempxa, "--xa");
1397 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1398 cPara.push_back(tempxa);
1399 char* tempXa = new char[xa.length()+1];
1400 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1401 //strcpy(tempXa, xa.c_str());
1402 cPara.push_back(tempXa);
1406 char* tempchunks = new char[9];
1407 //strcpy(tempchunks, "--chunks");
1408 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1409 cPara.push_back(tempchunks);
1410 char* tempChunks = new char[chunks.length()+1];
1411 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1412 //strcpy(tempChunks, chunks.c_str());
1413 cPara.push_back(tempChunks);
1417 char* tempminchunk = new char[11];
1418 //strcpy(tempminchunk, "--minchunk");
1419 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1420 cPara.push_back(tempminchunk);
1421 char* tempMinchunk = new char[minchunk.length()+1];
1422 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1423 //strcpy(tempMinchunk, minchunk.c_str());
1424 cPara.push_back(tempMinchunk);
1427 if (useIdsmoothwindow) {
1428 char* tempidsmoothwindow = new char[17];
1429 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1430 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1431 cPara.push_back(tempidsmoothwindow);
1432 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1433 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1434 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1435 cPara.push_back(tempIdsmoothwindow);
1438 /*if (useMinsmoothid) {
1439 char* tempminsmoothid = new char[14];
1440 //strcpy(tempminsmoothid, "--minsmoothid");
1441 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1442 cPara.push_back(tempminsmoothid);
1443 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1444 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1445 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1446 cPara.push_back(tempMinsmoothid);
1450 char* tempmaxp = new char[7];
1451 //strcpy(tempmaxp, "--maxp");
1452 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1453 cPara.push_back(tempmaxp);
1454 char* tempMaxp = new char[maxp.length()+1];
1455 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1456 //strcpy(tempMaxp, maxp.c_str());
1457 cPara.push_back(tempMaxp);
1461 char* tempskipgaps = new char[13];
1462 //strcpy(tempskipgaps, "--[no]skipgaps");
1463 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1464 cPara.push_back(tempskipgaps);
1468 char* tempskipgaps2 = new char[14];
1469 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1470 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1471 cPara.push_back(tempskipgaps2);
1475 char* tempminlen = new char[9];
1476 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1477 //strcpy(tempminlen, "--minlen");
1478 cPara.push_back(tempminlen);
1479 char* tempMinlen = new char[minlen.length()+1];
1480 //strcpy(tempMinlen, minlen.c_str());
1481 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1482 cPara.push_back(tempMinlen);
1486 char* tempmaxlen = new char[9];
1487 //strcpy(tempmaxlen, "--maxlen");
1488 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1489 cPara.push_back(tempmaxlen);
1490 char* tempMaxlen = new char[maxlen.length()+1];
1491 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1492 //strcpy(tempMaxlen, maxlen.c_str());
1493 cPara.push_back(tempMaxlen);
1497 char* tempucl = new char[5];
1498 strcpy(tempucl, "--ucl");
1499 cPara.push_back(tempucl);
1502 if (useQueryfract) {
1503 char* tempqueryfract = new char[13];
1504 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1505 //strcpy(tempqueryfract, "--queryfract");
1506 cPara.push_back(tempqueryfract);
1507 char* tempQueryfract = new char[queryfract.length()+1];
1508 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1509 //strcpy(tempQueryfract, queryfract.c_str());
1510 cPara.push_back(tempQueryfract);
1514 char** uchimeParameters;
1515 uchimeParameters = new char*[cPara.size()];
1516 string commandString = "";
1517 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1518 //int numArgs = cPara.size();
1520 //uchime_main(numArgs, uchimeParameters);
1521 //cout << "commandString = " << commandString << endl;
1522 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1524 commandString = "\"" + commandString + "\"";
1526 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1527 system(commandString.c_str());
1530 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1531 delete[] uchimeParameters;
1533 //remove "" from filenames
1534 outputFName = outputFName.substr(1, outputFName.length()-2);
1535 filename = filename.substr(1, filename.length()-2);
1536 alns = alns.substr(1, alns.length()-2);
1538 if (m->control_pressed) { return 0; }
1540 //create accnos file from uchime results
1542 m->openInputFile(outputFName, in);
1545 m->openOutputFile(accnos, out);
1551 if (m->control_pressed) { break; }
1554 string chimeraFlag = "";
1555 //in >> chimeraFlag >> name;
1557 string line = m->getline(in);
1558 vector<string> pieces = m->splitWhiteSpace(line);
1559 if (pieces.size() > 2) {
1561 //fix name if needed
1562 if (templatefile == "self") {
1563 name = name.substr(0, name.length()-1); //rip off last /
1564 name = name.substr(0, name.find_last_of('/'));
1567 chimeraFlag = pieces[pieces.size()-1];
1569 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1572 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1578 //if (templatefile != "self") { m->mothurRemove(filename); }
1582 catch(exception& e) {
1583 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1587 /**************************************************************************************************/
1588 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1589 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1593 m->openInputFile(filename, in);
1596 m->openOutputFile(output, out);
1599 if (m->control_pressed) { break; }
1601 Sequence seq(in); m->gobble(in);
1603 if (seq.getName() != "") { seq.printSequence(out); }
1610 catch(exception& e) {
1611 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1615 /**************************************************************************************************/
1617 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1623 vector<string> files;
1625 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1626 //break up file into multiple files
1627 m->divideFile(filename, processors, files);
1629 if (m->control_pressed) { return 0; }
1631 //loop through and create all the processes you want
1632 while (process != processors) {
1636 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1638 }else if (pid == 0){
1639 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1641 //pass numSeqs to parent
1643 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1644 m->openOutputFile(tempFile, out);
1646 out << numChimeras << endl;
1651 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1652 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1658 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1660 //force parent to wait until all the processes are done
1661 for (int i=0;i<processIDS.size();i++) {
1662 int temp = processIDS[i];
1666 for (int i = 0; i < processIDS.size(); i++) {
1668 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1669 m->openInputFile(tempFile, in);
1672 in >> tempNum; m->gobble(in);
1675 numChimeras += tempNum;
1677 in.close(); m->mothurRemove(tempFile);
1680 //////////////////////////////////////////////////////////////////////////////////////////////////////
1681 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1682 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1683 //////////////////////////////////////////////////////////////////////////////////////////////////////
1688 map<int, ofstream*> filehandles;
1689 map<int, ofstream*>::iterator it3;
1692 for (int i = 0; i < processors; i++) {
1693 temp = new ofstream;
1694 filehandles[i] = temp;
1695 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1696 files.push_back(filename+toString(i)+".temp");
1700 m->openInputFile(filename, in);
1704 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1706 Sequence tempSeq(in); m->gobble(in);
1708 if (tempSeq.getName() != "") {
1709 tempSeq.printSequence(*(filehandles[spot]));
1711 if (spot == processors) { spot = 0; }
1717 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1718 (*(it3->second)).close();
1722 //sanity check for number of processors
1723 if (count < processors) { processors = count; }
1725 vector<uchimeData*> pDataArray;
1726 DWORD dwThreadIdArray[processors-1];
1727 HANDLE hThreadArray[processors-1];
1728 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1730 //Create processor worker threads.
1731 for( int i=1; i<processors; i++ ){
1732 // Allocate memory for thread data.
1733 string extension = toString(i) + ".temp";
1735 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
1736 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1737 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1739 pDataArray.push_back(tempUchime);
1740 processIDS.push_back(i);
1742 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1743 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1744 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1748 //using the main process as a worker saves time and memory
1749 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1751 //Wait until all threads have terminated.
1752 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1754 //Close all thread handles and free memory allocations.
1755 for(int i=0; i < pDataArray.size(); i++){
1756 num += pDataArray[i]->count;
1757 numChimeras += pDataArray[i]->numChimeras;
1758 CloseHandle(hThreadArray[i]);
1759 delete pDataArray[i];
1763 //append output files
1764 for(int i=0;i<processIDS.size();i++){
1765 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1766 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1768 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1769 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1772 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1773 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1777 //get rid of the file pieces.
1778 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1781 catch(exception& e) {
1782 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1786 /**************************************************************************************************/
1788 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1795 CountTable newCount;
1796 if (hasCount && dups) { newCount.readTable(nameFile); }
1799 if (groups.size() < processors) { processors = groups.size(); }
1801 //divide the groups between the processors
1802 vector<linePair> lines;
1803 int numGroupsPerProcessor = groups.size() / processors;
1804 for (int i = 0; i < processors; i++) {
1805 int startIndex = i * numGroupsPerProcessor;
1806 int endIndex = (i+1) * numGroupsPerProcessor;
1807 if(i == (processors - 1)){ endIndex = groups.size(); }
1808 lines.push_back(linePair(startIndex, endIndex));
1811 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1813 //loop through and create all the processes you want
1814 while (process != processors) {
1818 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1820 }else if (pid == 0){
1821 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1823 //pass numSeqs to parent
1825 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1826 m->openOutputFile(tempFile, out);
1832 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1833 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1839 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1841 //force parent to wait until all the processes are done
1842 for (int i=0;i<processIDS.size();i++) {
1843 int temp = processIDS[i];
1847 for (int i = 0; i < processIDS.size(); i++) {
1849 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1850 m->openInputFile(tempFile, in);
1851 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1852 in.close(); m->mothurRemove(tempFile);
1856 //////////////////////////////////////////////////////////////////////////////////////////////////////
1857 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1858 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1859 //////////////////////////////////////////////////////////////////////////////////////////////////////
1861 vector<uchimeData*> pDataArray;
1862 DWORD dwThreadIdArray[processors-1];
1863 HANDLE hThreadArray[processors-1];
1865 //Create processor worker threads.
1866 for( int i=1; i<processors; i++ ){
1867 // Allocate memory for thread data.
1868 string extension = toString(i) + ".temp";
1870 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1871 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1872 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1874 pDataArray.push_back(tempUchime);
1875 processIDS.push_back(i);
1877 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1878 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1879 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1883 //using the main process as a worker saves time and memory
1884 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1886 //Wait until all threads have terminated.
1887 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1889 //Close all thread handles and free memory allocations.
1890 for(int i=0; i < pDataArray.size(); i++){
1891 num += pDataArray[i]->count;
1892 CloseHandle(hThreadArray[i]);
1893 delete pDataArray[i];
1900 if (hasCount && dups) {
1901 if (!m->isBlank(accnos + ".byCount")) {
1903 m->openInputFile(accnos + ".byCount", in2);
1906 while (!in2.eof()) {
1907 in2 >> name >> group; m->gobble(in2);
1908 newCount.setAbund(name, group, 0);
1912 m->mothurRemove(accnos + ".byCount");
1915 //append output files
1916 for(int i=0;i<processIDS.size();i++){
1917 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1918 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1920 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1921 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1924 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1925 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1928 if (hasCount && dups) {
1929 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1931 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1934 while (!in2.eof()) {
1935 in2 >> name >> group; m->gobble(in2);
1936 newCount.setAbund(name, group, 0);
1940 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1945 //print new *.pick.count_table
1946 if (hasCount && dups) { newCount.printTable(newCountFile); }
1951 catch(exception& e) {
1952 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1956 /**************************************************************************************************/