2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 catch(exception& e) {
136 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
143 abort = false; calledHelp = false; hasName=false; hasCount=false;
144 ReferenceDB* rdb = ReferenceDB::getInstance();
146 //allow user to run help
147 if(option == "help") { help(); abort = true; calledHelp = true; }
148 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151 vector<string> myArray = setParameters();
153 OptionParser parser(option);
154 map<string,string> parameters = parser.getParameters();
156 ValidParameters validParameter("chimera.uchime");
157 map<string,string>::iterator it;
159 //check to make sure all parameters are valid for command
160 for (it = parameters.begin(); it != parameters.end(); it++) {
161 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
164 vector<string> tempOutNames;
165 outputTypes["chimera"] = tempOutNames;
166 outputTypes["accnos"] = tempOutNames;
167 outputTypes["alns"] = tempOutNames;
168 outputTypes["count"] = tempOutNames;
170 //if the user changes the input directory command factory will send this info to us in the output parameter
171 string inputDir = validParameter.validFile(parameters, "inputdir", false);
172 if (inputDir == "not found"){ inputDir = ""; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", false);
176 if (fastafile == "not found") {
177 //if there is a current fasta file, use it
178 string filename = m->getFastaFile();
179 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182 m->splitAtDash(fastafile, fastaFileNames);
184 //go through files and make sure they are good, if not, then disregard them
185 for (int i = 0; i < fastaFileNames.size(); i++) {
188 if (fastaFileNames[i] == "current") {
189 fastaFileNames[i] = m->getFastaFile();
190 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
193 //erase from file list
194 fastaFileNames.erase(fastaFileNames.begin()+i);
201 if (inputDir != "") {
202 string path = m->hasPath(fastaFileNames[i]);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
210 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212 //if you can't open it, try default location
213 if (ableToOpen == 1) {
214 if (m->getDefaultPath() != "") { //default path is set
215 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220 fastaFileNames[i] = tryPath;
224 if (ableToOpen == 1) {
225 if (m->getOutputDir() != "") { //default path is set
226 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231 fastaFileNames[i] = tryPath;
237 if (ableToOpen == 1) {
238 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
239 //erase from file list
240 fastaFileNames.erase(fastaFileNames.begin()+i);
243 m->setFastaFile(fastaFileNames[i]);
248 //make sure there is at least one valid file left
249 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
253 //check for required parameters
254 namefile = validParameter.validFile(parameters, "name", false);
255 if (namefile == "not found") { namefile = ""; }
257 m->splitAtDash(namefile, nameFileNames);
259 //go through files and make sure they are good, if not, then disregard them
260 for (int i = 0; i < nameFileNames.size(); i++) {
263 if (nameFileNames[i] == "current") {
264 nameFileNames[i] = m->getNameFile();
265 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
268 //erase from file list
269 nameFileNames.erase(nameFileNames.begin()+i);
276 if (inputDir != "") {
277 string path = m->hasPath(nameFileNames[i]);
278 //if the user has not given a path then, add inputdir. else leave path alone.
279 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
285 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287 //if you can't open it, try default location
288 if (ableToOpen == 1) {
289 if (m->getDefaultPath() != "") { //default path is set
290 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295 nameFileNames[i] = tryPath;
299 if (ableToOpen == 1) {
300 if (m->getOutputDir() != "") { //default path is set
301 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306 nameFileNames[i] = tryPath;
312 if (ableToOpen == 1) {
313 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
314 //erase from file list
315 nameFileNames.erase(nameFileNames.begin()+i);
318 m->setNameFile(nameFileNames[i]);
324 if (nameFileNames.size() != 0) { hasName = true; }
326 //check for required parameters
327 vector<string> countfileNames;
328 countfile = validParameter.validFile(parameters, "count", false);
329 if (countfile == "not found") {
332 m->splitAtDash(countfile, countfileNames);
334 //go through files and make sure they are good, if not, then disregard them
335 for (int i = 0; i < countfileNames.size(); i++) {
338 if (countfileNames[i] == "current") {
339 countfileNames[i] = m->getCountTableFile();
340 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
343 //erase from file list
344 countfileNames.erase(countfileNames.begin()+i);
351 if (inputDir != "") {
352 string path = m->hasPath(countfileNames[i]);
353 //if the user has not given a path then, add inputdir. else leave path alone.
354 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
360 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362 //if you can't open it, try default location
363 if (ableToOpen == 1) {
364 if (m->getDefaultPath() != "") { //default path is set
365 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370 countfileNames[i] = tryPath;
374 if (ableToOpen == 1) {
375 if (m->getOutputDir() != "") { //default path is set
376 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381 countfileNames[i] = tryPath;
387 if (ableToOpen == 1) {
388 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
389 //erase from file list
390 countfileNames.erase(countfileNames.begin()+i);
393 m->setCountTableFile(countfileNames[i]);
399 if (countfileNames.size() != 0) { hasCount = true; }
401 //make sure there is at least one valid file left
402 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404 if (!hasName && hasCount) { nameFileNames = countfileNames; }
406 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408 bool hasGroup = true;
409 groupfile = validParameter.validFile(parameters, "group", false);
410 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
412 m->splitAtDash(groupfile, groupFileNames);
414 //go through files and make sure they are good, if not, then disregard them
415 for (int i = 0; i < groupFileNames.size(); i++) {
418 if (groupFileNames[i] == "current") {
419 groupFileNames[i] = m->getGroupFile();
420 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
423 //erase from file list
424 groupFileNames.erase(groupFileNames.begin()+i);
431 if (inputDir != "") {
432 string path = m->hasPath(groupFileNames[i]);
433 //if the user has not given a path then, add inputdir. else leave path alone.
434 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
440 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442 //if you can't open it, try default location
443 if (ableToOpen == 1) {
444 if (m->getDefaultPath() != "") { //default path is set
445 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450 groupFileNames[i] = tryPath;
454 if (ableToOpen == 1) {
455 if (m->getOutputDir() != "") { //default path is set
456 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461 groupFileNames[i] = tryPath;
467 if (ableToOpen == 1) {
468 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
469 //erase from file list
470 groupFileNames.erase(groupFileNames.begin()+i);
473 m->setGroupFile(groupFileNames[i]);
478 //make sure there is at least one valid file left
479 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 //if the user changes the output directory command factory will send this info to us in the output parameter
490 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
493 it = parameters.find("reference");
494 //user has given a template file
495 if(it != parameters.end()){
496 if (it->second == "self") { templatefile = "self"; }
498 path = m->hasPath(it->second);
499 //if the user has not given a path then, add inputdir. else leave path alone.
500 if (path == "") { parameters["reference"] = inputDir + it->second; }
502 templatefile = validParameter.validFile(parameters, "reference", true);
503 if (templatefile == "not open") { abort = true; }
504 else if (templatefile == "not found") { //check for saved reference sequences
505 if (rdb->getSavedReference() != "") {
506 templatefile = rdb->getSavedReference();
507 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
510 m->mothurOutEndLine();
515 }else if (hasName) { templatefile = "self"; }
516 else if (hasCount) { templatefile = "self"; }
518 if (rdb->getSavedReference() != "") {
519 templatefile = rdb->getSavedReference();
520 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
523 m->mothurOutEndLine();
524 templatefile = ""; abort = true;
528 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
529 m->setProcessors(temp);
530 m->mothurConvert(temp, processors);
532 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
533 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
536 chimealns = m->isTrue(temp);
538 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
539 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
540 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
541 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
542 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
543 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
544 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
545 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
546 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
547 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
548 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
549 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
551 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
553 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
554 ucl = m->isTrue(temp);
556 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
557 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
559 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
560 skipgaps = m->isTrue(temp);
562 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
563 skipgaps2 = m->isTrue(temp);
566 temp = validParameter.validFile(parameters, "dereplicate", false);
567 if (temp == "not found") { temp = "false"; }
568 dups = m->isTrue(temp);
571 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
572 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
574 //look for uchime exe
576 string tempPath = path;
577 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
578 path = path.substr(0, (tempPath.find_last_of('m')));
580 string uchimeCommand;
581 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
582 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
584 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
585 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
586 newCommand->execute();
588 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
589 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
590 newCommand->execute();
594 uchimeCommand = path + "uchime.exe";
597 //test to make sure uchime exists
599 uchimeCommand = m->getFullPathName(uchimeCommand);
600 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
601 if(ableToOpen == 1) {
602 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
603 //check to see if uchime is in the path??
605 string uLocation = m->findProgramPath("uchime");
609 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
610 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
612 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
615 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
616 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
617 }else { uchimeLocation = uchimeCommand; }
619 uchimeLocation = m->getFullPathName(uchimeLocation);
622 catch(exception& e) {
623 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
627 //***************************************************************************************************************
629 int ChimeraUchimeCommand::execute(){
632 if (abort == true) { if (calledHelp) { return 0; } return 2; }
634 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
636 for (int s = 0; s < fastaFileNames.size(); s++) {
638 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
640 int start = time(NULL);
641 string nameFile = "";
642 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
643 map<string, string> variables;
644 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
645 string outputFileName = getOutputFileName("chimera", variables);
646 string accnosFileName = getOutputFileName("accnos", variables);
647 string alnsFileName = getOutputFileName("alns", variables);
648 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
649 string newCountFile = "";
651 //you provided a groupfile
652 string groupFile = "";
653 bool hasGroup = false;
654 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
657 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
658 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
659 newCountFile = getOutputFileName("count", variables);
662 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
664 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
665 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
666 nameFile = nameFileNames[s];
667 }else { nameFile = getNamesFile(fastaFileNames[s]); }
669 map<string, string> seqs;
670 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
673 vector<seqPriorityNode> nameMapCount;
677 ct.readTable(nameFile, true);
678 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
679 int num = ct.getNumSeqs(it->first);
680 if (num == 0) { error = 1; }
682 seqPriorityNode temp(num, it->second, it->first);
683 nameMapCount.push_back(temp);
687 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
689 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
690 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
692 printFile(nameMapCount, newFasta);
693 fastaFileNames[s] = newFasta;
696 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
699 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
700 nameFile = nameFileNames[s];
701 }else { nameFile = getNamesFile(fastaFileNames[s]); }
703 //Parse sequences by group
704 vector<string> groups;
705 map<string, string> uniqueNames;
707 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
708 groups = cparser->getNamesOfGroups();
709 uniqueNames = cparser->getAllSeqsMap();
711 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
712 groups = sparser->getNamesOfGroups();
713 uniqueNames = sparser->getAllSeqsMap();
716 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
719 ofstream out, out1, out2;
720 m->openOutputFile(outputFileName, out); out.close();
721 m->openOutputFile(accnosFileName, out1); out1.close();
722 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
725 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
727 if (hasCount && dups) {
728 CountTable c; c.readTable(nameFile, true);
729 if (!m->isBlank(newCountFile)) {
731 m->openInputFile(newCountFile, in2);
735 in2 >> name >> group; m->gobble(in2);
736 c.setAbund(name, group, 0);
740 m->mothurRemove(newCountFile);
741 c.printTable(newCountFile);
744 }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
746 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
750 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
752 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
753 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
757 set<string> doNotRemove;
758 CountTable c; c.readTable(newCountFile, true);
759 vector<string> namesInTable = c.getNamesOfSeqs();
760 for (int i = 0; i < namesInTable.size(); i++) {
761 int temp = c.getNumSeqs(namesInTable[i]);
762 if (temp == 0) { c.remove(namesInTable[i]); }
763 else { doNotRemove.insert((namesInTable[i])); }
765 //remove names we want to keep from accnos file.
766 set<string> accnosNames = m->readAccnos(accnosFileName);
768 m->openOutputFile(accnosFileName, out2);
769 for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
770 if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
773 c.printTable(newCountFile);
774 outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
778 if (hasCount) { delete cparser; }
779 else { delete sparser; }
781 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
784 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
789 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
790 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
794 m->openOutputFile(outputFileName+".temp", out);
795 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
798 m->appendFiles(outputFileName, outputFileName+".temp");
799 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
801 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
803 //remove file made for uchime
804 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
806 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
809 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
810 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
811 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
814 //set accnos file as new current accnosfile
816 itTypes = outputTypes.find("accnos");
817 if (itTypes != outputTypes.end()) {
818 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
821 itTypes = outputTypes.find("count");
822 if (itTypes != outputTypes.end()) {
823 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
826 m->mothurOutEndLine();
827 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
828 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
829 m->mothurOutEndLine();
834 catch(exception& e) {
835 m->errorOut(e, "ChimeraUchimeCommand", "execute");
839 //**********************************************************************************************************************
840 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
842 map<string, string>::iterator itUnique;
846 m->openOutputFile(accnosFileName+".temp", out2);
849 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
850 set<string>::iterator itNames;
851 set<string> chimerasInFile;
852 set<string>::iterator itChimeras;
854 if (!m->isBlank(accnosFileName)) {
857 m->openInputFile(accnosFileName, in2);
860 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
862 in2 >> name; m->gobble(in2);
865 itUnique = uniqueNames.find(name);
867 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
869 itChimeras = chimerasInFile.find((itUnique->second));
871 if (itChimeras == chimerasInFile.end()) {
872 out2 << itUnique->second << endl;
873 chimerasInFile.insert((itUnique->second));
882 m->mothurRemove(accnosFileName);
883 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
889 m->openInputFile(outputFileName, in);
892 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
893 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
896 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
899 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
900 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
901 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
902 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
907 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
910 in >> temp1; m->gobble(in);
911 in >> name; m->gobble(in);
912 in >> parent1; m->gobble(in);
913 in >> parent2; m->gobble(in);
914 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
917 //parse name - name will look like U68590/ab=1/
918 string restOfName = "";
919 int pos = name.find_first_of('/');
920 if (pos != string::npos) {
921 restOfName = name.substr(pos);
922 name = name.substr(0, pos);
926 itUnique = uniqueNames.find(name);
928 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
930 name = itUnique->second;
931 //is this name already in the file
932 itNames = namesInFile.find((name));
934 if (itNames == namesInFile.end()) { //no not in file
935 if (flag == "N") { //are you really a no??
936 //is this sequence really not chimeric??
937 itChimeras = chimerasInFile.find(name);
939 //then you really are a no so print, otherwise skip
940 if (itChimeras == chimerasInFile.end()) { print = true; }
941 }else{ print = true; }
946 out << temp1 << '\t' << name << restOfName << '\t';
947 namesInFile.insert(name);
949 //parse parent1 names
950 if (parent1 != "*") {
952 pos = parent1.find_first_of('/');
953 if (pos != string::npos) {
954 restOfName = parent1.substr(pos);
955 parent1 = parent1.substr(0, pos);
958 itUnique = uniqueNames.find(parent1);
959 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
960 else { out << itUnique->second << restOfName << '\t'; }
961 }else { out << parent1 << '\t'; }
963 //parse parent2 names
964 if (parent2 != "*") {
966 pos = parent2.find_first_of('/');
967 if (pos != string::npos) {
968 restOfName = parent2.substr(pos);
969 parent2 = parent2.substr(0, pos);
972 itUnique = uniqueNames.find(parent2);
973 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
974 else { out << itUnique->second << restOfName << '\t'; }
975 }else { out << parent2 << '\t'; }
977 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
983 m->mothurRemove(outputFileName);
984 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
988 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
990 ------------------------------------------------------------------------
991 Query ( 179 nt) F21Fcsw_11639/ab=591/
992 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
993 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
995 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
996 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
997 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
998 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
999 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
1000 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1002 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1003 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1004 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1005 Diffs NNN N N N N N BB NNN
1006 Votes 000 0 0 0 0 0 ++ 000
1007 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1009 A 159 TGGAACTGAGACACGGTCCAA 179
1010 Q 159 TGGAACTGAGACACGGTCCAA 179
1011 B 161 TGGAACTGAGACACGGTCCAA 181
1014 Model BBBBBBBBBBBBBBBBBBBBB
1016 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1017 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1021 m->openInputFile(alnsFileName, in3);
1024 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1027 namesInFile.clear();
1030 while (!in3.eof()) {
1031 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1034 line = m->getline(in3);
1038 istringstream iss(line);
1041 //are you a name line
1042 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1044 for (int i = 0; i < line.length(); i++) {
1046 if (line[i] == ')') { break; }
1047 else { out3 << line[i]; }
1050 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1051 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1053 out << line[spot] << line[spot+1];
1055 name = line.substr(spot+2);
1057 //parse name - name will either look like U68590/ab=1/ or U68590
1058 string restOfName = "";
1059 int pos = name.find_first_of('/');
1060 if (pos != string::npos) {
1061 restOfName = name.substr(pos);
1062 name = name.substr(0, pos);
1066 itUnique = uniqueNames.find(name);
1068 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1070 //only limit repeats on query names
1071 if (temp == "Query") {
1072 itNames = namesInFile.find((itUnique->second));
1074 if (itNames == namesInFile.end()) {
1075 out << itUnique->second << restOfName << endl;
1076 namesInFile.insert((itUnique->second));
1078 }else { out << itUnique->second << restOfName << endl; }
1083 }else { //not need to alter line
1084 out3 << line << endl;
1086 }else { out3 << endl; }
1091 m->mothurRemove(alnsFileName);
1092 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1097 catch(exception& e) {
1098 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1102 //**********************************************************************************************************************
1103 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1106 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1109 m->openOutputFile(filename, out);
1111 //print new file in order of
1112 for (int i = 0; i < nameMapCount.size(); i++) {
1113 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1119 catch(exception& e) {
1120 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1124 //**********************************************************************************************************************
1125 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1127 //create input file for uchime
1128 //read through fastafile and store info
1130 m->openInputFile(filename, in);
1134 if (m->control_pressed) { in.close(); return 0; }
1136 Sequence seq(in); m->gobble(in);
1137 seqs[seq.getName()] = seq.getAligned();
1143 catch(exception& e) {
1144 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1148 //**********************************************************************************************************************
1150 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1152 string nameFile = "";
1154 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1156 //use unique.seqs to create new name and fastafile
1157 string inputString = "fasta=" + inputFile;
1158 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1159 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1160 m->mothurCalling = true;
1162 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1163 uniqueCommand->execute();
1165 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1167 delete uniqueCommand;
1168 m->mothurCalling = false;
1169 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1171 nameFile = filenames["name"][0];
1172 inputFile = filenames["fasta"][0];
1176 catch(exception& e) {
1177 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1181 //**********************************************************************************************************************
1182 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1186 int numChimeras = 0;
1189 ofstream outCountList;
1190 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1192 for (int i = start; i < end; i++) {
1193 int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
1196 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1197 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1199 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1200 totalSeqs += numSeqs;
1202 if (m->control_pressed) { return 0; }
1204 //remove file made for uchime
1205 if (!m->debug) { m->mothurRemove(filename); }
1206 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1208 //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1209 //This table will zero out group counts for seqs determined to be chimeric by that group.
1211 if (!m->isBlank(accnos+groups[i])) {
1213 m->openInputFile(accnos+groups[i], in);
1217 in >> name; m->gobble(in);
1218 outCountList << name << '\t' << groups[i] << endl;
1222 map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1223 map<string, string>::iterator itN;
1225 m->openOutputFile(accnos+groups[i]+".temp", out);
1227 in >> name; m->gobble(in);
1228 itN = thisnamemap.find(name);
1229 if (itN != thisnamemap.end()) {
1230 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1231 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1233 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1237 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1244 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1245 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1246 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1248 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1251 if (hasCount && dups) { outCountList.close(); }
1256 catch(exception& e) {
1257 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1261 //**********************************************************************************************************************
1263 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1266 outputFName = m->getFullPathName(outputFName);
1267 filename = m->getFullPathName(filename);
1268 alns = m->getFullPathName(alns);
1270 //to allow for spaces in the path
1271 outputFName = "\"" + outputFName + "\"";
1272 filename = "\"" + filename + "\"";
1273 alns = "\"" + alns + "\"";
1275 vector<char*> cPara;
1277 string uchimeCommand = uchimeLocation;
1278 uchimeCommand = "\"" + uchimeCommand + "\" ";
1281 tempUchime= new char[uchimeCommand.length()+1];
1283 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1284 cPara.push_back(tempUchime);
1286 //are you using a reference file
1287 if (templatefile != "self") {
1288 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1289 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1290 filename = outputFileName;
1291 filename = "\"" + filename + "\"";
1292 //add reference file
1293 char* tempRef = new char[5];
1294 //strcpy(tempRef, "--db");
1295 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1296 cPara.push_back(tempRef);
1297 char* tempR = new char[templatefile.length()+1];
1298 //strcpy(tempR, templatefile.c_str());
1299 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1300 cPara.push_back(tempR);
1303 char* tempIn = new char[8];
1304 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1305 //strcpy(tempIn, "--input");
1306 cPara.push_back(tempIn);
1307 char* temp = new char[filename.length()+1];
1308 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1309 //strcpy(temp, filename.c_str());
1310 cPara.push_back(temp);
1312 char* tempO = new char[12];
1313 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1314 //strcpy(tempO, "--uchimeout");
1315 cPara.push_back(tempO);
1316 char* tempout = new char[outputFName.length()+1];
1317 //strcpy(tempout, outputFName.c_str());
1318 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1319 cPara.push_back(tempout);
1322 char* tempA = new char[13];
1323 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1324 //strcpy(tempA, "--uchimealns");
1325 cPara.push_back(tempA);
1326 char* tempa = new char[alns.length()+1];
1327 //strcpy(tempa, alns.c_str());
1328 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1329 cPara.push_back(tempa);
1333 char* tempA = new char[9];
1334 *tempA = '\0'; strncat(tempA, "--strand", 8);
1335 cPara.push_back(tempA);
1336 char* tempa = new char[strand.length()+1];
1337 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1338 cPara.push_back(tempa);
1342 char* tempskew = new char[9];
1343 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1344 //strcpy(tempskew, "--abskew");
1345 cPara.push_back(tempskew);
1346 char* tempSkew = new char[abskew.length()+1];
1347 //strcpy(tempSkew, abskew.c_str());
1348 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1349 cPara.push_back(tempSkew);
1353 char* tempminh = new char[7];
1354 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1355 //strcpy(tempminh, "--minh");
1356 cPara.push_back(tempminh);
1357 char* tempMinH = new char[minh.length()+1];
1358 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1359 //strcpy(tempMinH, minh.c_str());
1360 cPara.push_back(tempMinH);
1364 char* tempmindiv = new char[9];
1365 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1366 //strcpy(tempmindiv, "--mindiv");
1367 cPara.push_back(tempmindiv);
1368 char* tempMindiv = new char[mindiv.length()+1];
1369 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1370 //strcpy(tempMindiv, mindiv.c_str());
1371 cPara.push_back(tempMindiv);
1375 char* tempxn = new char[5];
1376 //strcpy(tempxn, "--xn");
1377 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1378 cPara.push_back(tempxn);
1379 char* tempXn = new char[xn.length()+1];
1380 //strcpy(tempXn, xn.c_str());
1381 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1382 cPara.push_back(tempXn);
1386 char* tempdn = new char[5];
1387 //strcpy(tempdn, "--dn");
1388 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1389 cPara.push_back(tempdn);
1390 char* tempDn = new char[dn.length()+1];
1391 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1392 //strcpy(tempDn, dn.c_str());
1393 cPara.push_back(tempDn);
1397 char* tempxa = new char[5];
1398 //strcpy(tempxa, "--xa");
1399 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1400 cPara.push_back(tempxa);
1401 char* tempXa = new char[xa.length()+1];
1402 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1403 //strcpy(tempXa, xa.c_str());
1404 cPara.push_back(tempXa);
1408 char* tempchunks = new char[9];
1409 //strcpy(tempchunks, "--chunks");
1410 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1411 cPara.push_back(tempchunks);
1412 char* tempChunks = new char[chunks.length()+1];
1413 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1414 //strcpy(tempChunks, chunks.c_str());
1415 cPara.push_back(tempChunks);
1419 char* tempminchunk = new char[11];
1420 //strcpy(tempminchunk, "--minchunk");
1421 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1422 cPara.push_back(tempminchunk);
1423 char* tempMinchunk = new char[minchunk.length()+1];
1424 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1425 //strcpy(tempMinchunk, minchunk.c_str());
1426 cPara.push_back(tempMinchunk);
1429 if (useIdsmoothwindow) {
1430 char* tempidsmoothwindow = new char[17];
1431 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1432 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1433 cPara.push_back(tempidsmoothwindow);
1434 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1435 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1436 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1437 cPara.push_back(tempIdsmoothwindow);
1440 /*if (useMinsmoothid) {
1441 char* tempminsmoothid = new char[14];
1442 //strcpy(tempminsmoothid, "--minsmoothid");
1443 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1444 cPara.push_back(tempminsmoothid);
1445 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1446 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1447 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1448 cPara.push_back(tempMinsmoothid);
1452 char* tempmaxp = new char[7];
1453 //strcpy(tempmaxp, "--maxp");
1454 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1455 cPara.push_back(tempmaxp);
1456 char* tempMaxp = new char[maxp.length()+1];
1457 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1458 //strcpy(tempMaxp, maxp.c_str());
1459 cPara.push_back(tempMaxp);
1463 char* tempskipgaps = new char[13];
1464 //strcpy(tempskipgaps, "--[no]skipgaps");
1465 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1466 cPara.push_back(tempskipgaps);
1470 char* tempskipgaps2 = new char[14];
1471 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1472 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1473 cPara.push_back(tempskipgaps2);
1477 char* tempminlen = new char[9];
1478 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1479 //strcpy(tempminlen, "--minlen");
1480 cPara.push_back(tempminlen);
1481 char* tempMinlen = new char[minlen.length()+1];
1482 //strcpy(tempMinlen, minlen.c_str());
1483 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1484 cPara.push_back(tempMinlen);
1488 char* tempmaxlen = new char[9];
1489 //strcpy(tempmaxlen, "--maxlen");
1490 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1491 cPara.push_back(tempmaxlen);
1492 char* tempMaxlen = new char[maxlen.length()+1];
1493 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1494 //strcpy(tempMaxlen, maxlen.c_str());
1495 cPara.push_back(tempMaxlen);
1499 char* tempucl = new char[5];
1500 strcpy(tempucl, "--ucl");
1501 cPara.push_back(tempucl);
1504 if (useQueryfract) {
1505 char* tempqueryfract = new char[13];
1506 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1507 //strcpy(tempqueryfract, "--queryfract");
1508 cPara.push_back(tempqueryfract);
1509 char* tempQueryfract = new char[queryfract.length()+1];
1510 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1511 //strcpy(tempQueryfract, queryfract.c_str());
1512 cPara.push_back(tempQueryfract);
1516 char** uchimeParameters;
1517 uchimeParameters = new char*[cPara.size()];
1518 string commandString = "";
1519 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1520 //int numArgs = cPara.size();
1522 //uchime_main(numArgs, uchimeParameters);
1523 //cout << "commandString = " << commandString << endl;
1524 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1526 commandString = "\"" + commandString + "\"";
1528 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1529 system(commandString.c_str());
1532 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1533 delete[] uchimeParameters;
1535 //remove "" from filenames
1536 outputFName = outputFName.substr(1, outputFName.length()-2);
1537 filename = filename.substr(1, filename.length()-2);
1538 alns = alns.substr(1, alns.length()-2);
1540 if (m->control_pressed) { return 0; }
1542 //create accnos file from uchime results
1544 m->openInputFile(outputFName, in);
1547 m->openOutputFile(accnos, out);
1553 if (m->control_pressed) { break; }
1556 string chimeraFlag = "";
1557 //in >> chimeraFlag >> name;
1559 string line = m->getline(in);
1560 vector<string> pieces = m->splitWhiteSpace(line);
1561 if (pieces.size() > 2) {
1563 //fix name if needed
1564 if (templatefile == "self") {
1565 name = name.substr(0, name.length()-1); //rip off last /
1566 name = name.substr(0, name.find_last_of('/'));
1569 chimeraFlag = pieces[pieces.size()-1];
1571 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1574 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1580 //if (templatefile != "self") { m->mothurRemove(filename); }
1584 catch(exception& e) {
1585 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1589 /**************************************************************************************************/
1590 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1591 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1595 m->openInputFile(filename, in);
1598 m->openOutputFile(output, out);
1601 if (m->control_pressed) { break; }
1603 Sequence seq(in); m->gobble(in);
1605 if (seq.getName() != "") { seq.printSequence(out); }
1612 catch(exception& e) {
1613 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1617 /**************************************************************************************************/
1619 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1625 vector<string> files;
1627 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1628 //break up file into multiple files
1629 m->divideFile(filename, processors, files);
1631 if (m->control_pressed) { return 0; }
1633 //loop through and create all the processes you want
1634 while (process != processors) {
1638 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1640 }else if (pid == 0){
1641 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1643 //pass numSeqs to parent
1645 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1646 m->openOutputFile(tempFile, out);
1648 out << numChimeras << endl;
1653 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1654 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1660 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1662 //force parent to wait until all the processes are done
1663 for (int i=0;i<processIDS.size();i++) {
1664 int temp = processIDS[i];
1668 for (int i = 0; i < processIDS.size(); i++) {
1670 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1671 m->openInputFile(tempFile, in);
1674 in >> tempNum; m->gobble(in);
1677 numChimeras += tempNum;
1679 in.close(); m->mothurRemove(tempFile);
1682 //////////////////////////////////////////////////////////////////////////////////////////////////////
1683 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1684 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1685 //////////////////////////////////////////////////////////////////////////////////////////////////////
1690 map<int, ofstream*> filehandles;
1691 map<int, ofstream*>::iterator it3;
1694 for (int i = 0; i < processors; i++) {
1695 temp = new ofstream;
1696 filehandles[i] = temp;
1697 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1698 files.push_back(filename+toString(i)+".temp");
1702 m->openInputFile(filename, in);
1706 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1708 Sequence tempSeq(in); m->gobble(in);
1710 if (tempSeq.getName() != "") {
1711 tempSeq.printSequence(*(filehandles[spot]));
1713 if (spot == processors) { spot = 0; }
1719 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1720 (*(it3->second)).close();
1724 //sanity check for number of processors
1725 if (count < processors) { processors = count; }
1727 vector<uchimeData*> pDataArray;
1728 DWORD dwThreadIdArray[processors-1];
1729 HANDLE hThreadArray[processors-1];
1730 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1732 //Create processor worker threads.
1733 for( int i=1; i<processors; i++ ){
1734 // Allocate memory for thread data.
1735 string extension = toString(i) + ".temp";
1737 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
1738 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1739 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1741 pDataArray.push_back(tempUchime);
1742 processIDS.push_back(i);
1744 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1745 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1746 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1750 //using the main process as a worker saves time and memory
1751 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1753 //Wait until all threads have terminated.
1754 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1756 //Close all thread handles and free memory allocations.
1757 for(int i=0; i < pDataArray.size(); i++){
1758 num += pDataArray[i]->count;
1759 numChimeras += pDataArray[i]->numChimeras;
1760 CloseHandle(hThreadArray[i]);
1761 delete pDataArray[i];
1765 //append output files
1766 for(int i=0;i<processIDS.size();i++){
1767 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1768 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1770 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1771 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1774 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1775 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1779 //get rid of the file pieces.
1780 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1783 catch(exception& e) {
1784 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1788 /**************************************************************************************************/
1790 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1797 CountTable newCount;
1798 if (hasCount && dups) { newCount.readTable(nameFile, true); }
1801 if (groups.size() < processors) { processors = groups.size(); }
1803 //divide the groups between the processors
1804 vector<linePair> lines;
1805 int numGroupsPerProcessor = groups.size() / processors;
1806 for (int i = 0; i < processors; i++) {
1807 int startIndex = i * numGroupsPerProcessor;
1808 int endIndex = (i+1) * numGroupsPerProcessor;
1809 if(i == (processors - 1)){ endIndex = groups.size(); }
1810 lines.push_back(linePair(startIndex, endIndex));
1813 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1815 //loop through and create all the processes you want
1816 while (process != processors) {
1820 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1822 }else if (pid == 0){
1823 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1825 //pass numSeqs to parent
1827 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1828 m->openOutputFile(tempFile, out);
1834 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1835 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1841 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1843 //force parent to wait until all the processes are done
1844 for (int i=0;i<processIDS.size();i++) {
1845 int temp = processIDS[i];
1849 for (int i = 0; i < processIDS.size(); i++) {
1851 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1852 m->openInputFile(tempFile, in);
1853 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1854 in.close(); m->mothurRemove(tempFile);
1858 //////////////////////////////////////////////////////////////////////////////////////////////////////
1859 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1860 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1861 //////////////////////////////////////////////////////////////////////////////////////////////////////
1863 vector<uchimeData*> pDataArray;
1864 DWORD dwThreadIdArray[processors-1];
1865 HANDLE hThreadArray[processors-1];
1867 //Create processor worker threads.
1868 for( int i=1; i<processors; i++ ){
1869 // Allocate memory for thread data.
1870 string extension = toString(i) + ".temp";
1872 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1873 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1874 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1876 pDataArray.push_back(tempUchime);
1877 processIDS.push_back(i);
1879 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1880 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1881 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1885 //using the main process as a worker saves time and memory
1886 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1888 //Wait until all threads have terminated.
1889 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1891 //Close all thread handles and free memory allocations.
1892 for(int i=0; i < pDataArray.size(); i++){
1893 num += pDataArray[i]->count;
1894 CloseHandle(hThreadArray[i]);
1895 delete pDataArray[i];
1902 if (hasCount && dups) {
1903 if (!m->isBlank(accnos + ".byCount")) {
1905 m->openInputFile(accnos + ".byCount", in2);
1908 while (!in2.eof()) {
1909 in2 >> name >> group; m->gobble(in2);
1910 newCount.setAbund(name, group, 0);
1914 m->mothurRemove(accnos + ".byCount");
1917 //append output files
1918 for(int i=0;i<processIDS.size();i++){
1919 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1920 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1922 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1923 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1926 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1927 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1930 if (hasCount && dups) {
1931 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1933 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1936 while (!in2.eof()) {
1937 in2 >> name >> group; m->gobble(in2);
1938 newCount.setAbund(name, group, 0);
1942 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1947 //print new *.pick.count_table
1948 if (hasCount && dups) { newCount.printTable(newCountFile); }
1953 catch(exception& e) {
1954 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1958 /**************************************************************************************************/