2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 catch(exception& e) {
136 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
143 abort = false; calledHelp = false; hasName=false; hasCount=false;
144 ReferenceDB* rdb = ReferenceDB::getInstance();
146 //allow user to run help
147 if(option == "help") { help(); abort = true; calledHelp = true; }
148 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151 vector<string> myArray = setParameters();
153 OptionParser parser(option);
154 map<string,string> parameters = parser.getParameters();
156 ValidParameters validParameter("chimera.uchime");
157 map<string,string>::iterator it;
159 //check to make sure all parameters are valid for command
160 for (it = parameters.begin(); it != parameters.end(); it++) {
161 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
164 vector<string> tempOutNames;
165 outputTypes["chimera"] = tempOutNames;
166 outputTypes["accnos"] = tempOutNames;
167 outputTypes["alns"] = tempOutNames;
168 outputTypes["count"] = tempOutNames;
170 //if the user changes the input directory command factory will send this info to us in the output parameter
171 string inputDir = validParameter.validFile(parameters, "inputdir", false);
172 if (inputDir == "not found"){ inputDir = ""; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", false);
176 if (fastafile == "not found") {
177 //if there is a current fasta file, use it
178 string filename = m->getFastaFile();
179 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182 m->splitAtDash(fastafile, fastaFileNames);
184 //go through files and make sure they are good, if not, then disregard them
185 for (int i = 0; i < fastaFileNames.size(); i++) {
188 if (fastaFileNames[i] == "current") {
189 fastaFileNames[i] = m->getFastaFile();
190 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
193 //erase from file list
194 fastaFileNames.erase(fastaFileNames.begin()+i);
201 if (inputDir != "") {
202 string path = m->hasPath(fastaFileNames[i]);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
210 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212 //if you can't open it, try default location
213 if (ableToOpen == 1) {
214 if (m->getDefaultPath() != "") { //default path is set
215 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220 fastaFileNames[i] = tryPath;
224 if (ableToOpen == 1) {
225 if (m->getOutputDir() != "") { //default path is set
226 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231 fastaFileNames[i] = tryPath;
237 if (ableToOpen == 1) {
238 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
239 //erase from file list
240 fastaFileNames.erase(fastaFileNames.begin()+i);
243 m->setFastaFile(fastaFileNames[i]);
248 //make sure there is at least one valid file left
249 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
253 //check for required parameters
254 namefile = validParameter.validFile(parameters, "name", false);
255 if (namefile == "not found") { namefile = ""; }
257 m->splitAtDash(namefile, nameFileNames);
259 //go through files and make sure they are good, if not, then disregard them
260 for (int i = 0; i < nameFileNames.size(); i++) {
263 if (nameFileNames[i] == "current") {
264 nameFileNames[i] = m->getNameFile();
265 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
268 //erase from file list
269 nameFileNames.erase(nameFileNames.begin()+i);
276 if (inputDir != "") {
277 string path = m->hasPath(nameFileNames[i]);
278 //if the user has not given a path then, add inputdir. else leave path alone.
279 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
285 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287 //if you can't open it, try default location
288 if (ableToOpen == 1) {
289 if (m->getDefaultPath() != "") { //default path is set
290 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295 nameFileNames[i] = tryPath;
299 if (ableToOpen == 1) {
300 if (m->getOutputDir() != "") { //default path is set
301 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306 nameFileNames[i] = tryPath;
312 if (ableToOpen == 1) {
313 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
314 //erase from file list
315 nameFileNames.erase(nameFileNames.begin()+i);
318 m->setNameFile(nameFileNames[i]);
324 if (nameFileNames.size() != 0) { hasName = true; }
326 //check for required parameters
327 vector<string> countfileNames;
328 countfile = validParameter.validFile(parameters, "count", false);
329 if (countfile == "not found") {
332 m->splitAtDash(countfile, countfileNames);
334 //go through files and make sure they are good, if not, then disregard them
335 for (int i = 0; i < countfileNames.size(); i++) {
338 if (countfileNames[i] == "current") {
339 countfileNames[i] = m->getCountTableFile();
340 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
343 //erase from file list
344 countfileNames.erase(countfileNames.begin()+i);
351 if (inputDir != "") {
352 string path = m->hasPath(countfileNames[i]);
353 //if the user has not given a path then, add inputdir. else leave path alone.
354 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
360 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362 //if you can't open it, try default location
363 if (ableToOpen == 1) {
364 if (m->getDefaultPath() != "") { //default path is set
365 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370 countfileNames[i] = tryPath;
374 if (ableToOpen == 1) {
375 if (m->getOutputDir() != "") { //default path is set
376 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381 countfileNames[i] = tryPath;
387 if (ableToOpen == 1) {
388 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
389 //erase from file list
390 countfileNames.erase(countfileNames.begin()+i);
393 m->setCountTableFile(countfileNames[i]);
399 if (countfileNames.size() != 0) { hasCount = true; }
401 //make sure there is at least one valid file left
402 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404 if (!hasName && hasCount) { nameFileNames = countfileNames; }
406 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408 bool hasGroup = true;
409 groupfile = validParameter.validFile(parameters, "group", false);
410 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
412 m->splitAtDash(groupfile, groupFileNames);
414 //go through files and make sure they are good, if not, then disregard them
415 for (int i = 0; i < groupFileNames.size(); i++) {
418 if (groupFileNames[i] == "current") {
419 groupFileNames[i] = m->getGroupFile();
420 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
423 //erase from file list
424 groupFileNames.erase(groupFileNames.begin()+i);
431 if (inputDir != "") {
432 string path = m->hasPath(groupFileNames[i]);
433 //if the user has not given a path then, add inputdir. else leave path alone.
434 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
440 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442 //if you can't open it, try default location
443 if (ableToOpen == 1) {
444 if (m->getDefaultPath() != "") { //default path is set
445 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450 groupFileNames[i] = tryPath;
454 if (ableToOpen == 1) {
455 if (m->getOutputDir() != "") { //default path is set
456 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461 groupFileNames[i] = tryPath;
467 if (ableToOpen == 1) {
468 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
469 //erase from file list
470 groupFileNames.erase(groupFileNames.begin()+i);
473 m->setGroupFile(groupFileNames[i]);
478 //make sure there is at least one valid file left
479 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 //if the user changes the output directory command factory will send this info to us in the output parameter
490 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
493 it = parameters.find("reference");
494 //user has given a template file
495 if(it != parameters.end()){
496 if (it->second == "self") { templatefile = "self"; }
498 path = m->hasPath(it->second);
499 //if the user has not given a path then, add inputdir. else leave path alone.
500 if (path == "") { parameters["reference"] = inputDir + it->second; }
502 templatefile = validParameter.validFile(parameters, "reference", true);
503 if (templatefile == "not open") { abort = true; }
504 else if (templatefile == "not found") { //check for saved reference sequences
505 if (rdb->getSavedReference() != "") {
506 templatefile = rdb->getSavedReference();
507 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
510 m->mothurOutEndLine();
515 }else if (hasName) { templatefile = "self"; }
516 else if (hasCount) { templatefile = "self"; }
518 if (rdb->getSavedReference() != "") {
519 templatefile = rdb->getSavedReference();
520 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
523 m->mothurOutEndLine();
524 templatefile = ""; abort = true;
528 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
529 m->setProcessors(temp);
530 m->mothurConvert(temp, processors);
532 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
533 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
536 chimealns = m->isTrue(temp);
538 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
539 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
540 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
541 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
542 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
543 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
544 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
545 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
546 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
547 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
548 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
549 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
551 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
553 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
554 ucl = m->isTrue(temp);
556 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
557 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
559 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
560 skipgaps = m->isTrue(temp);
562 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
563 skipgaps2 = m->isTrue(temp);
566 temp = validParameter.validFile(parameters, "dereplicate", false);
567 if (temp == "not found") {
568 if (groupfile != "") { temp = "false"; }
569 else { temp = "true"; }
571 dups = m->isTrue(temp);
574 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
577 //look for uchime exe
579 string tempPath = path;
580 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581 path = path.substr(0, (tempPath.find_last_of('m')));
583 string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
587 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
588 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589 newCommand->execute();
591 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
592 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593 newCommand->execute();
597 uchimeCommand = path + "uchime.exe";
600 //test to make sure uchime exists
602 uchimeCommand = m->getFullPathName(uchimeCommand);
603 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604 if(ableToOpen == 1) {
605 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606 //check to see if uchime is in the path??
608 string uLocation = m->findProgramPath("uchime");
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
615 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
618 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
619 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620 }else { uchimeLocation = uchimeCommand; }
622 uchimeLocation = m->getFullPathName(uchimeLocation);
625 catch(exception& e) {
626 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
630 //***************************************************************************************************************
632 int ChimeraUchimeCommand::execute(){
635 if (abort == true) { if (calledHelp) { return 0; } return 2; }
637 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
639 for (int s = 0; s < fastaFileNames.size(); s++) {
641 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
643 int start = time(NULL);
644 string nameFile = "";
645 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
646 map<string, string> variables;
647 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648 string outputFileName = getOutputFileName("chimera", variables);
649 string accnosFileName = getOutputFileName("accnos", variables);
650 string alnsFileName = getOutputFileName("alns", variables);
651 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652 string newCountFile = "";
654 //you provided a groupfile
655 string groupFile = "";
656 bool hasGroup = false;
657 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
660 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662 newCountFile = getOutputFileName("count", variables);
665 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
667 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669 nameFile = nameFileNames[s];
670 }else { nameFile = getNamesFile(fastaFileNames[s]); }
672 map<string, string> seqs;
673 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
676 vector<seqPriorityNode> nameMapCount;
680 ct.readTable(nameFile);
681 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682 int num = ct.getNumSeqs(it->first);
683 if (num == 0) { error = 1; }
685 seqPriorityNode temp(num, it->second, it->first);
686 nameMapCount.push_back(temp);
690 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
692 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
693 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
695 printFile(nameMapCount, newFasta);
696 fastaFileNames[s] = newFasta;
699 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
702 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703 nameFile = nameFileNames[s];
704 }else { nameFile = getNamesFile(fastaFileNames[s]); }
706 //Parse sequences by group
707 vector<string> groups;
708 map<string, string> uniqueNames;
710 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711 groups = cparser->getNamesOfGroups();
712 uniqueNames = cparser->getAllSeqsMap();
714 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715 groups = sparser->getNamesOfGroups();
716 uniqueNames = sparser->getAllSeqsMap();
719 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
722 ofstream out, out1, out2;
723 m->openOutputFile(outputFileName, out); out.close();
724 m->openOutputFile(accnosFileName, out1); out1.close();
725 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
728 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
730 if (hasCount && dups) {
731 CountTable c; c.readTable(nameFile);
732 if (!m->isBlank(newCountFile)) {
734 m->openInputFile(newCountFile, in2);
738 in2 >> name >> group; m->gobble(in2);
739 c.setAbund(name, group, 0);
743 m->mothurRemove(newCountFile);
744 c.printTable(newCountFile);
747 }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
749 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
753 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
755 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
756 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
760 set<string> doNotRemove;
761 CountTable c; c.readTable(newCountFile);
762 vector<string> namesInTable = c.getNamesOfSeqs();
763 for (int i = 0; i < namesInTable.size(); i++) {
764 int temp = c.getNumSeqs(namesInTable[i]);
765 if (temp == 0) { c.remove(namesInTable[i]); }
766 else { doNotRemove.insert((namesInTable[i])); }
768 //remove names we want to keep from accnos file.
769 set<string> accnosNames = m->readAccnos(accnosFileName);
771 m->openOutputFile(accnosFileName, out2);
772 for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
773 if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
776 c.printTable(newCountFile);
777 outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
781 if (hasCount) { delete cparser; }
782 else { delete sparser; }
784 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
787 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
792 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
797 m->openOutputFile(outputFileName+".temp", out);
798 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
801 m->appendFiles(outputFileName, outputFileName+".temp");
802 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
804 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
806 //remove file made for uchime
807 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
809 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
812 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
813 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
814 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
817 //set accnos file as new current accnosfile
819 itTypes = outputTypes.find("accnos");
820 if (itTypes != outputTypes.end()) {
821 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
824 itTypes = outputTypes.find("count");
825 if (itTypes != outputTypes.end()) {
826 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
829 m->mothurOutEndLine();
830 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
831 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
832 m->mothurOutEndLine();
837 catch(exception& e) {
838 m->errorOut(e, "ChimeraUchimeCommand", "execute");
842 //**********************************************************************************************************************
843 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
845 map<string, string>::iterator itUnique;
849 m->openOutputFile(accnosFileName+".temp", out2);
852 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
853 set<string>::iterator itNames;
854 set<string> chimerasInFile;
855 set<string>::iterator itChimeras;
857 if (!m->isBlank(accnosFileName)) {
860 m->openInputFile(accnosFileName, in2);
863 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
865 in2 >> name; m->gobble(in2);
868 itUnique = uniqueNames.find(name);
870 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
872 itChimeras = chimerasInFile.find((itUnique->second));
874 if (itChimeras == chimerasInFile.end()) {
875 out2 << itUnique->second << endl;
876 chimerasInFile.insert((itUnique->second));
885 m->mothurRemove(accnosFileName);
886 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
892 m->openInputFile(outputFileName, in);
895 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
896 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
899 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
902 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
903 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
904 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
905 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
910 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
913 in >> temp1; m->gobble(in);
914 in >> name; m->gobble(in);
915 in >> parent1; m->gobble(in);
916 in >> parent2; m->gobble(in);
917 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
920 //parse name - name will look like U68590/ab=1/
921 string restOfName = "";
922 int pos = name.find_first_of('/');
923 if (pos != string::npos) {
924 restOfName = name.substr(pos);
925 name = name.substr(0, pos);
929 itUnique = uniqueNames.find(name);
931 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
933 name = itUnique->second;
934 //is this name already in the file
935 itNames = namesInFile.find((name));
937 if (itNames == namesInFile.end()) { //no not in file
938 if (flag == "N") { //are you really a no??
939 //is this sequence really not chimeric??
940 itChimeras = chimerasInFile.find(name);
942 //then you really are a no so print, otherwise skip
943 if (itChimeras == chimerasInFile.end()) { print = true; }
944 }else{ print = true; }
949 out << temp1 << '\t' << name << restOfName << '\t';
950 namesInFile.insert(name);
952 //parse parent1 names
953 if (parent1 != "*") {
955 pos = parent1.find_first_of('/');
956 if (pos != string::npos) {
957 restOfName = parent1.substr(pos);
958 parent1 = parent1.substr(0, pos);
961 itUnique = uniqueNames.find(parent1);
962 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
963 else { out << itUnique->second << restOfName << '\t'; }
964 }else { out << parent1 << '\t'; }
966 //parse parent2 names
967 if (parent2 != "*") {
969 pos = parent2.find_first_of('/');
970 if (pos != string::npos) {
971 restOfName = parent2.substr(pos);
972 parent2 = parent2.substr(0, pos);
975 itUnique = uniqueNames.find(parent2);
976 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
977 else { out << itUnique->second << restOfName << '\t'; }
978 }else { out << parent2 << '\t'; }
980 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
986 m->mothurRemove(outputFileName);
987 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
991 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
993 ------------------------------------------------------------------------
994 Query ( 179 nt) F21Fcsw_11639/ab=591/
995 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
996 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
998 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
999 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
1000 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
1001 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
1002 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
1003 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1005 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1006 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1007 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1008 Diffs NNN N N N N N BB NNN
1009 Votes 000 0 0 0 0 0 ++ 000
1010 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1012 A 159 TGGAACTGAGACACGGTCCAA 179
1013 Q 159 TGGAACTGAGACACGGTCCAA 179
1014 B 161 TGGAACTGAGACACGGTCCAA 181
1017 Model BBBBBBBBBBBBBBBBBBBBB
1019 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1020 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1024 m->openInputFile(alnsFileName, in3);
1027 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1030 namesInFile.clear();
1033 while (!in3.eof()) {
1034 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1037 line = m->getline(in3);
1041 istringstream iss(line);
1044 //are you a name line
1045 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1047 for (int i = 0; i < line.length(); i++) {
1049 if (line[i] == ')') { break; }
1050 else { out3 << line[i]; }
1053 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1054 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1056 out << line[spot] << line[spot+1];
1058 name = line.substr(spot+2);
1060 //parse name - name will either look like U68590/ab=1/ or U68590
1061 string restOfName = "";
1062 int pos = name.find_first_of('/');
1063 if (pos != string::npos) {
1064 restOfName = name.substr(pos);
1065 name = name.substr(0, pos);
1069 itUnique = uniqueNames.find(name);
1071 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1073 //only limit repeats on query names
1074 if (temp == "Query") {
1075 itNames = namesInFile.find((itUnique->second));
1077 if (itNames == namesInFile.end()) {
1078 out << itUnique->second << restOfName << endl;
1079 namesInFile.insert((itUnique->second));
1081 }else { out << itUnique->second << restOfName << endl; }
1086 }else { //not need to alter line
1087 out3 << line << endl;
1089 }else { out3 << endl; }
1094 m->mothurRemove(alnsFileName);
1095 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1100 catch(exception& e) {
1101 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1105 //**********************************************************************************************************************
1106 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1109 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1112 m->openOutputFile(filename, out);
1114 //print new file in order of
1115 for (int i = 0; i < nameMapCount.size(); i++) {
1116 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1122 catch(exception& e) {
1123 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1127 //**********************************************************************************************************************
1128 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1130 //create input file for uchime
1131 //read through fastafile and store info
1133 m->openInputFile(filename, in);
1137 if (m->control_pressed) { in.close(); return 0; }
1139 Sequence seq(in); m->gobble(in);
1140 seqs[seq.getName()] = seq.getAligned();
1146 catch(exception& e) {
1147 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1151 //**********************************************************************************************************************
1153 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1155 string nameFile = "";
1157 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1159 //use unique.seqs to create new name and fastafile
1160 string inputString = "fasta=" + inputFile;
1161 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1162 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1163 m->mothurCalling = true;
1165 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1166 uniqueCommand->execute();
1168 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1170 delete uniqueCommand;
1171 m->mothurCalling = false;
1172 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1174 nameFile = filenames["name"][0];
1175 inputFile = filenames["fasta"][0];
1179 catch(exception& e) {
1180 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1184 //**********************************************************************************************************************
1185 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1189 int numChimeras = 0;
1192 ofstream outCountList;
1193 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1195 for (int i = start; i < end; i++) {
1196 int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
1199 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1200 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1202 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1203 totalSeqs += numSeqs;
1205 if (m->control_pressed) { return 0; }
1207 //remove file made for uchime
1208 if (!m->debug) { m->mothurRemove(filename); }
1209 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1211 //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1212 //This table will zero out group counts for seqs determined to be chimeric by that group.
1214 if (!m->isBlank(accnos+groups[i])) {
1216 m->openInputFile(accnos+groups[i], in);
1220 in >> name; m->gobble(in);
1221 outCountList << name << '\t' << groups[i] << endl;
1225 map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1226 map<string, string>::iterator itN;
1228 m->openOutputFile(accnos+groups[i]+".temp", out);
1230 in >> name; m->gobble(in);
1231 itN = thisnamemap.find(name);
1232 if (itN != thisnamemap.end()) {
1233 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1234 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1236 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1240 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1247 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1248 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1249 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1251 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1254 if (hasCount && dups) { outCountList.close(); }
1259 catch(exception& e) {
1260 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1264 //**********************************************************************************************************************
1266 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1269 outputFName = m->getFullPathName(outputFName);
1270 filename = m->getFullPathName(filename);
1271 alns = m->getFullPathName(alns);
1273 //to allow for spaces in the path
1274 outputFName = "\"" + outputFName + "\"";
1275 filename = "\"" + filename + "\"";
1276 alns = "\"" + alns + "\"";
1278 vector<char*> cPara;
1280 string uchimeCommand = uchimeLocation;
1281 uchimeCommand = "\"" + uchimeCommand + "\" ";
1284 tempUchime= new char[uchimeCommand.length()+1];
1286 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1287 cPara.push_back(tempUchime);
1289 //are you using a reference file
1290 if (templatefile != "self") {
1291 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1292 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1293 filename = outputFileName;
1294 filename = "\"" + filename + "\"";
1295 //add reference file
1296 char* tempRef = new char[5];
1297 //strcpy(tempRef, "--db");
1298 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1299 cPara.push_back(tempRef);
1300 char* tempR = new char[templatefile.length()+1];
1301 //strcpy(tempR, templatefile.c_str());
1302 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1303 cPara.push_back(tempR);
1306 char* tempIn = new char[8];
1307 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1308 //strcpy(tempIn, "--input");
1309 cPara.push_back(tempIn);
1310 char* temp = new char[filename.length()+1];
1311 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1312 //strcpy(temp, filename.c_str());
1313 cPara.push_back(temp);
1315 char* tempO = new char[12];
1316 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1317 //strcpy(tempO, "--uchimeout");
1318 cPara.push_back(tempO);
1319 char* tempout = new char[outputFName.length()+1];
1320 //strcpy(tempout, outputFName.c_str());
1321 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1322 cPara.push_back(tempout);
1325 char* tempA = new char[13];
1326 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1327 //strcpy(tempA, "--uchimealns");
1328 cPara.push_back(tempA);
1329 char* tempa = new char[alns.length()+1];
1330 //strcpy(tempa, alns.c_str());
1331 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1332 cPara.push_back(tempa);
1336 char* tempA = new char[9];
1337 *tempA = '\0'; strncat(tempA, "--strand", 8);
1338 cPara.push_back(tempA);
1339 char* tempa = new char[strand.length()+1];
1340 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1341 cPara.push_back(tempa);
1345 char* tempskew = new char[9];
1346 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1347 //strcpy(tempskew, "--abskew");
1348 cPara.push_back(tempskew);
1349 char* tempSkew = new char[abskew.length()+1];
1350 //strcpy(tempSkew, abskew.c_str());
1351 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1352 cPara.push_back(tempSkew);
1356 char* tempminh = new char[7];
1357 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1358 //strcpy(tempminh, "--minh");
1359 cPara.push_back(tempminh);
1360 char* tempMinH = new char[minh.length()+1];
1361 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1362 //strcpy(tempMinH, minh.c_str());
1363 cPara.push_back(tempMinH);
1367 char* tempmindiv = new char[9];
1368 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1369 //strcpy(tempmindiv, "--mindiv");
1370 cPara.push_back(tempmindiv);
1371 char* tempMindiv = new char[mindiv.length()+1];
1372 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1373 //strcpy(tempMindiv, mindiv.c_str());
1374 cPara.push_back(tempMindiv);
1378 char* tempxn = new char[5];
1379 //strcpy(tempxn, "--xn");
1380 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1381 cPara.push_back(tempxn);
1382 char* tempXn = new char[xn.length()+1];
1383 //strcpy(tempXn, xn.c_str());
1384 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1385 cPara.push_back(tempXn);
1389 char* tempdn = new char[5];
1390 //strcpy(tempdn, "--dn");
1391 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1392 cPara.push_back(tempdn);
1393 char* tempDn = new char[dn.length()+1];
1394 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1395 //strcpy(tempDn, dn.c_str());
1396 cPara.push_back(tempDn);
1400 char* tempxa = new char[5];
1401 //strcpy(tempxa, "--xa");
1402 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1403 cPara.push_back(tempxa);
1404 char* tempXa = new char[xa.length()+1];
1405 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1406 //strcpy(tempXa, xa.c_str());
1407 cPara.push_back(tempXa);
1411 char* tempchunks = new char[9];
1412 //strcpy(tempchunks, "--chunks");
1413 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1414 cPara.push_back(tempchunks);
1415 char* tempChunks = new char[chunks.length()+1];
1416 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1417 //strcpy(tempChunks, chunks.c_str());
1418 cPara.push_back(tempChunks);
1422 char* tempminchunk = new char[11];
1423 //strcpy(tempminchunk, "--minchunk");
1424 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1425 cPara.push_back(tempminchunk);
1426 char* tempMinchunk = new char[minchunk.length()+1];
1427 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1428 //strcpy(tempMinchunk, minchunk.c_str());
1429 cPara.push_back(tempMinchunk);
1432 if (useIdsmoothwindow) {
1433 char* tempidsmoothwindow = new char[17];
1434 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1435 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1436 cPara.push_back(tempidsmoothwindow);
1437 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1438 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1439 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1440 cPara.push_back(tempIdsmoothwindow);
1443 /*if (useMinsmoothid) {
1444 char* tempminsmoothid = new char[14];
1445 //strcpy(tempminsmoothid, "--minsmoothid");
1446 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1447 cPara.push_back(tempminsmoothid);
1448 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1449 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1450 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1451 cPara.push_back(tempMinsmoothid);
1455 char* tempmaxp = new char[7];
1456 //strcpy(tempmaxp, "--maxp");
1457 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1458 cPara.push_back(tempmaxp);
1459 char* tempMaxp = new char[maxp.length()+1];
1460 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1461 //strcpy(tempMaxp, maxp.c_str());
1462 cPara.push_back(tempMaxp);
1466 char* tempskipgaps = new char[13];
1467 //strcpy(tempskipgaps, "--[no]skipgaps");
1468 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1469 cPara.push_back(tempskipgaps);
1473 char* tempskipgaps2 = new char[14];
1474 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1475 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1476 cPara.push_back(tempskipgaps2);
1480 char* tempminlen = new char[9];
1481 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1482 //strcpy(tempminlen, "--minlen");
1483 cPara.push_back(tempminlen);
1484 char* tempMinlen = new char[minlen.length()+1];
1485 //strcpy(tempMinlen, minlen.c_str());
1486 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1487 cPara.push_back(tempMinlen);
1491 char* tempmaxlen = new char[9];
1492 //strcpy(tempmaxlen, "--maxlen");
1493 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1494 cPara.push_back(tempmaxlen);
1495 char* tempMaxlen = new char[maxlen.length()+1];
1496 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1497 //strcpy(tempMaxlen, maxlen.c_str());
1498 cPara.push_back(tempMaxlen);
1502 char* tempucl = new char[5];
1503 strcpy(tempucl, "--ucl");
1504 cPara.push_back(tempucl);
1507 if (useQueryfract) {
1508 char* tempqueryfract = new char[13];
1509 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1510 //strcpy(tempqueryfract, "--queryfract");
1511 cPara.push_back(tempqueryfract);
1512 char* tempQueryfract = new char[queryfract.length()+1];
1513 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1514 //strcpy(tempQueryfract, queryfract.c_str());
1515 cPara.push_back(tempQueryfract);
1519 char** uchimeParameters;
1520 uchimeParameters = new char*[cPara.size()];
1521 string commandString = "";
1522 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1523 //int numArgs = cPara.size();
1525 //uchime_main(numArgs, uchimeParameters);
1526 //cout << "commandString = " << commandString << endl;
1527 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1529 commandString = "\"" + commandString + "\"";
1531 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1532 system(commandString.c_str());
1535 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1536 delete[] uchimeParameters;
1538 //remove "" from filenames
1539 outputFName = outputFName.substr(1, outputFName.length()-2);
1540 filename = filename.substr(1, filename.length()-2);
1541 alns = alns.substr(1, alns.length()-2);
1543 if (m->control_pressed) { return 0; }
1545 //create accnos file from uchime results
1547 m->openInputFile(outputFName, in);
1550 m->openOutputFile(accnos, out);
1556 if (m->control_pressed) { break; }
1559 string chimeraFlag = "";
1560 //in >> chimeraFlag >> name;
1562 string line = m->getline(in);
1563 vector<string> pieces = m->splitWhiteSpace(line);
1564 if (pieces.size() > 2) {
1566 //fix name if needed
1567 if (templatefile == "self") {
1568 name = name.substr(0, name.length()-1); //rip off last /
1569 name = name.substr(0, name.find_last_of('/'));
1572 chimeraFlag = pieces[pieces.size()-1];
1574 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1577 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1583 //if (templatefile != "self") { m->mothurRemove(filename); }
1587 catch(exception& e) {
1588 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1592 /**************************************************************************************************/
1593 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1594 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1598 m->openInputFile(filename, in);
1601 m->openOutputFile(output, out);
1604 if (m->control_pressed) { break; }
1606 Sequence seq(in); m->gobble(in);
1608 if (seq.getName() != "") { seq.printSequence(out); }
1615 catch(exception& e) {
1616 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1620 /**************************************************************************************************/
1622 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1628 vector<string> files;
1630 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1631 //break up file into multiple files
1632 m->divideFile(filename, processors, files);
1634 if (m->control_pressed) { return 0; }
1636 //loop through and create all the processes you want
1637 while (process != processors) {
1641 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1643 }else if (pid == 0){
1644 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1646 //pass numSeqs to parent
1648 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1649 m->openOutputFile(tempFile, out);
1651 out << numChimeras << endl;
1656 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1657 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1663 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1665 //force parent to wait until all the processes are done
1666 for (int i=0;i<processIDS.size();i++) {
1667 int temp = processIDS[i];
1671 for (int i = 0; i < processIDS.size(); i++) {
1673 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1674 m->openInputFile(tempFile, in);
1677 in >> tempNum; m->gobble(in);
1680 numChimeras += tempNum;
1682 in.close(); m->mothurRemove(tempFile);
1685 //////////////////////////////////////////////////////////////////////////////////////////////////////
1686 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1687 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1688 //////////////////////////////////////////////////////////////////////////////////////////////////////
1693 map<int, ofstream*> filehandles;
1694 map<int, ofstream*>::iterator it3;
1697 for (int i = 0; i < processors; i++) {
1698 temp = new ofstream;
1699 filehandles[i] = temp;
1700 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1701 files.push_back(filename+toString(i)+".temp");
1705 m->openInputFile(filename, in);
1709 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1711 Sequence tempSeq(in); m->gobble(in);
1713 if (tempSeq.getName() != "") {
1714 tempSeq.printSequence(*(filehandles[spot]));
1716 if (spot == processors) { spot = 0; }
1722 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1723 (*(it3->second)).close();
1727 //sanity check for number of processors
1728 if (count < processors) { processors = count; }
1730 vector<uchimeData*> pDataArray;
1731 DWORD dwThreadIdArray[processors-1];
1732 HANDLE hThreadArray[processors-1];
1733 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1735 //Create processor worker threads.
1736 for( int i=1; i<processors; i++ ){
1737 // Allocate memory for thread data.
1738 string extension = toString(i) + ".temp";
1740 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
1741 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1742 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1744 pDataArray.push_back(tempUchime);
1745 processIDS.push_back(i);
1747 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1748 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1749 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1753 //using the main process as a worker saves time and memory
1754 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1756 //Wait until all threads have terminated.
1757 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1759 //Close all thread handles and free memory allocations.
1760 for(int i=0; i < pDataArray.size(); i++){
1761 num += pDataArray[i]->count;
1762 numChimeras += pDataArray[i]->numChimeras;
1763 CloseHandle(hThreadArray[i]);
1764 delete pDataArray[i];
1768 //append output files
1769 for(int i=0;i<processIDS.size();i++){
1770 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1771 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1773 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1774 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1777 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1778 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1782 //get rid of the file pieces.
1783 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1786 catch(exception& e) {
1787 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1791 /**************************************************************************************************/
1793 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1800 CountTable newCount;
1801 if (hasCount && dups) { newCount.readTable(nameFile); }
1804 if (groups.size() < processors) { processors = groups.size(); }
1806 //divide the groups between the processors
1807 vector<linePair> lines;
1808 int numGroupsPerProcessor = groups.size() / processors;
1809 for (int i = 0; i < processors; i++) {
1810 int startIndex = i * numGroupsPerProcessor;
1811 int endIndex = (i+1) * numGroupsPerProcessor;
1812 if(i == (processors - 1)){ endIndex = groups.size(); }
1813 lines.push_back(linePair(startIndex, endIndex));
1816 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1818 //loop through and create all the processes you want
1819 while (process != processors) {
1823 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1825 }else if (pid == 0){
1826 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1828 //pass numSeqs to parent
1830 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1831 m->openOutputFile(tempFile, out);
1837 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1838 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1844 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1846 //force parent to wait until all the processes are done
1847 for (int i=0;i<processIDS.size();i++) {
1848 int temp = processIDS[i];
1852 for (int i = 0; i < processIDS.size(); i++) {
1854 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1855 m->openInputFile(tempFile, in);
1856 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1857 in.close(); m->mothurRemove(tempFile);
1861 //////////////////////////////////////////////////////////////////////////////////////////////////////
1862 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1863 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1864 //////////////////////////////////////////////////////////////////////////////////////////////////////
1866 vector<uchimeData*> pDataArray;
1867 DWORD dwThreadIdArray[processors-1];
1868 HANDLE hThreadArray[processors-1];
1870 //Create processor worker threads.
1871 for( int i=1; i<processors; i++ ){
1872 // Allocate memory for thread data.
1873 string extension = toString(i) + ".temp";
1875 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1876 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1877 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1879 pDataArray.push_back(tempUchime);
1880 processIDS.push_back(i);
1882 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1883 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1884 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1888 //using the main process as a worker saves time and memory
1889 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1891 //Wait until all threads have terminated.
1892 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1894 //Close all thread handles and free memory allocations.
1895 for(int i=0; i < pDataArray.size(); i++){
1896 num += pDataArray[i]->count;
1897 CloseHandle(hThreadArray[i]);
1898 delete pDataArray[i];
1905 if (hasCount && dups) {
1906 if (!m->isBlank(accnos + ".byCount")) {
1908 m->openInputFile(accnos + ".byCount", in2);
1911 while (!in2.eof()) {
1912 in2 >> name >> group; m->gobble(in2);
1913 newCount.setAbund(name, group, 0);
1917 m->mothurRemove(accnos + ".byCount");
1920 //append output files
1921 for(int i=0;i<processIDS.size();i++){
1922 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1923 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1925 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1926 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1929 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1930 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1933 if (hasCount && dups) {
1934 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1936 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1939 while (!in2.eof()) {
1940 in2 >> name >> group; m->gobble(in2);
1941 newCount.setAbund(name, group, 0);
1945 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1950 //print new *.pick.count_table
1951 if (hasCount && dups) { newCount.printTable(newCountFile); }
1956 catch(exception& e) {
1957 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1961 /**************************************************************************************************/