2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 catch(exception& e) {
136 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
143 abort = false; calledHelp = false; hasName=false; hasCount=false;
144 ReferenceDB* rdb = ReferenceDB::getInstance();
146 //allow user to run help
147 if(option == "help") { help(); abort = true; calledHelp = true; }
148 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151 vector<string> myArray = setParameters();
153 OptionParser parser(option);
154 map<string,string> parameters = parser.getParameters();
156 ValidParameters validParameter("chimera.uchime");
157 map<string,string>::iterator it;
159 //check to make sure all parameters are valid for command
160 for (it = parameters.begin(); it != parameters.end(); it++) {
161 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
164 vector<string> tempOutNames;
165 outputTypes["chimera"] = tempOutNames;
166 outputTypes["accnos"] = tempOutNames;
167 outputTypes["alns"] = tempOutNames;
168 outputTypes["count"] = tempOutNames;
170 //if the user changes the input directory command factory will send this info to us in the output parameter
171 string inputDir = validParameter.validFile(parameters, "inputdir", false);
172 if (inputDir == "not found"){ inputDir = ""; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", false);
176 if (fastafile == "not found") {
177 //if there is a current fasta file, use it
178 string filename = m->getFastaFile();
179 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182 m->splitAtDash(fastafile, fastaFileNames);
184 //go through files and make sure they are good, if not, then disregard them
185 for (int i = 0; i < fastaFileNames.size(); i++) {
188 if (fastaFileNames[i] == "current") {
189 fastaFileNames[i] = m->getFastaFile();
190 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
193 //erase from file list
194 fastaFileNames.erase(fastaFileNames.begin()+i);
201 if (inputDir != "") {
202 string path = m->hasPath(fastaFileNames[i]);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
210 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212 //if you can't open it, try default location
213 if (ableToOpen == 1) {
214 if (m->getDefaultPath() != "") { //default path is set
215 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220 fastaFileNames[i] = tryPath;
224 if (ableToOpen == 1) {
225 if (m->getOutputDir() != "") { //default path is set
226 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231 fastaFileNames[i] = tryPath;
237 if (ableToOpen == 1) {
238 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
239 //erase from file list
240 fastaFileNames.erase(fastaFileNames.begin()+i);
243 m->setFastaFile(fastaFileNames[i]);
248 //make sure there is at least one valid file left
249 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
253 //check for required parameters
254 namefile = validParameter.validFile(parameters, "name", false);
255 if (namefile == "not found") { namefile = ""; }
257 m->splitAtDash(namefile, nameFileNames);
259 //go through files and make sure they are good, if not, then disregard them
260 for (int i = 0; i < nameFileNames.size(); i++) {
263 if (nameFileNames[i] == "current") {
264 nameFileNames[i] = m->getNameFile();
265 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
268 //erase from file list
269 nameFileNames.erase(nameFileNames.begin()+i);
276 if (inputDir != "") {
277 string path = m->hasPath(nameFileNames[i]);
278 //if the user has not given a path then, add inputdir. else leave path alone.
279 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
285 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287 //if you can't open it, try default location
288 if (ableToOpen == 1) {
289 if (m->getDefaultPath() != "") { //default path is set
290 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295 nameFileNames[i] = tryPath;
299 if (ableToOpen == 1) {
300 if (m->getOutputDir() != "") { //default path is set
301 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306 nameFileNames[i] = tryPath;
312 if (ableToOpen == 1) {
313 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
314 //erase from file list
315 nameFileNames.erase(nameFileNames.begin()+i);
318 m->setNameFile(nameFileNames[i]);
324 if (nameFileNames.size() != 0) { hasName = true; }
326 //check for required parameters
327 vector<string> countfileNames;
328 countfile = validParameter.validFile(parameters, "count", false);
329 if (countfile == "not found") {
332 m->splitAtDash(countfile, countfileNames);
334 //go through files and make sure they are good, if not, then disregard them
335 for (int i = 0; i < countfileNames.size(); i++) {
338 if (countfileNames[i] == "current") {
339 countfileNames[i] = m->getCountTableFile();
340 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
343 //erase from file list
344 countfileNames.erase(countfileNames.begin()+i);
351 if (inputDir != "") {
352 string path = m->hasPath(countfileNames[i]);
353 //if the user has not given a path then, add inputdir. else leave path alone.
354 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
360 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362 //if you can't open it, try default location
363 if (ableToOpen == 1) {
364 if (m->getDefaultPath() != "") { //default path is set
365 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370 countfileNames[i] = tryPath;
374 if (ableToOpen == 1) {
375 if (m->getOutputDir() != "") { //default path is set
376 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381 countfileNames[i] = tryPath;
387 if (ableToOpen == 1) {
388 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
389 //erase from file list
390 countfileNames.erase(countfileNames.begin()+i);
393 m->setCountTableFile(countfileNames[i]);
399 if (countfileNames.size() != 0) { hasCount = true; }
401 //make sure there is at least one valid file left
402 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404 if (!hasName && hasCount) { nameFileNames = countfileNames; }
406 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408 bool hasGroup = true;
409 groupfile = validParameter.validFile(parameters, "group", false);
410 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
412 m->splitAtDash(groupfile, groupFileNames);
414 //go through files and make sure they are good, if not, then disregard them
415 for (int i = 0; i < groupFileNames.size(); i++) {
418 if (groupFileNames[i] == "current") {
419 groupFileNames[i] = m->getGroupFile();
420 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
423 //erase from file list
424 groupFileNames.erase(groupFileNames.begin()+i);
431 if (inputDir != "") {
432 string path = m->hasPath(groupFileNames[i]);
433 //if the user has not given a path then, add inputdir. else leave path alone.
434 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
440 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442 //if you can't open it, try default location
443 if (ableToOpen == 1) {
444 if (m->getDefaultPath() != "") { //default path is set
445 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450 groupFileNames[i] = tryPath;
454 if (ableToOpen == 1) {
455 if (m->getOutputDir() != "") { //default path is set
456 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461 groupFileNames[i] = tryPath;
467 if (ableToOpen == 1) {
468 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
469 //erase from file list
470 groupFileNames.erase(groupFileNames.begin()+i);
473 m->setGroupFile(groupFileNames[i]);
478 //make sure there is at least one valid file left
479 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 //if the user changes the output directory command factory will send this info to us in the output parameter
490 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
493 it = parameters.find("reference");
494 //user has given a template file
495 if(it != parameters.end()){
496 if (it->second == "self") { templatefile = "self"; }
498 path = m->hasPath(it->second);
499 //if the user has not given a path then, add inputdir. else leave path alone.
500 if (path == "") { parameters["reference"] = inputDir + it->second; }
502 templatefile = validParameter.validFile(parameters, "reference", true);
503 if (templatefile == "not open") { abort = true; }
504 else if (templatefile == "not found") { //check for saved reference sequences
505 if (rdb->getSavedReference() != "") {
506 templatefile = rdb->getSavedReference();
507 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
510 m->mothurOutEndLine();
515 }else if (hasName) { templatefile = "self"; }
516 else if (hasCount) { templatefile = "self"; }
518 if (rdb->getSavedReference() != "") {
519 templatefile = rdb->getSavedReference();
520 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
523 m->mothurOutEndLine();
524 templatefile = ""; abort = true;
528 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
529 m->setProcessors(temp);
530 m->mothurConvert(temp, processors);
532 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
533 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
536 chimealns = m->isTrue(temp);
538 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
539 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
540 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
541 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
542 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
543 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
544 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
545 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
546 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
547 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
548 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
549 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
551 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
553 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
554 ucl = m->isTrue(temp);
556 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
557 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
559 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
560 skipgaps = m->isTrue(temp);
562 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
563 skipgaps2 = m->isTrue(temp);
566 temp = validParameter.validFile(parameters, "dereplicate", false);
567 if (temp == "not found") {
568 if (groupfile != "") { temp = "false"; }
569 else { temp = "true"; }
571 dups = m->isTrue(temp);
574 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
577 //look for uchime exe
579 string tempPath = path;
580 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581 path = path.substr(0, (tempPath.find_last_of('m')));
583 string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
587 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
588 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589 newCommand->execute();
591 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
592 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593 newCommand->execute();
597 uchimeCommand = path + "uchime.exe";
600 //test to make sure uchime exists
602 uchimeCommand = m->getFullPathName(uchimeCommand);
603 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604 if(ableToOpen == 1) {
605 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606 //check to see if uchime is in the path??
608 string uLocation = m->findProgramPath("uchime");
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
615 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
618 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
619 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620 }else { uchimeLocation = uchimeCommand; }
622 uchimeLocation = m->getFullPathName(uchimeLocation);
625 catch(exception& e) {
626 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
630 //***************************************************************************************************************
632 int ChimeraUchimeCommand::execute(){
635 if (abort == true) { if (calledHelp) { return 0; } return 2; }
637 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
639 for (int s = 0; s < fastaFileNames.size(); s++) {
641 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
643 int start = time(NULL);
644 string nameFile = "";
645 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
646 map<string, string> variables;
647 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648 string outputFileName = getOutputFileName("chimera", variables);
649 string accnosFileName = getOutputFileName("accnos", variables);
650 string alnsFileName = getOutputFileName("alns", variables);
651 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652 string newCountFile = "";
654 //you provided a groupfile
655 string groupFile = "";
656 bool hasGroup = false;
657 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
660 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662 newCountFile = getOutputFileName("count", variables);
665 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
667 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669 nameFile = nameFileNames[s];
670 }else { nameFile = getNamesFile(fastaFileNames[s]); }
672 map<string, string> seqs;
673 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
676 vector<seqPriorityNode> nameMapCount;
680 ct.readTable(nameFile);
681 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682 int num = ct.getNumSeqs(it->first);
683 if (num == 0) { error = 1; }
685 seqPriorityNode temp(num, it->second, it->first);
686 nameMapCount.push_back(temp);
690 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
692 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
693 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
695 printFile(nameMapCount, newFasta);
696 fastaFileNames[s] = newFasta;
699 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
702 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703 nameFile = nameFileNames[s];
704 }else { nameFile = getNamesFile(fastaFileNames[s]); }
706 //Parse sequences by group
707 vector<string> groups;
708 map<string, string> uniqueNames;
710 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711 groups = cparser->getNamesOfGroups();
712 uniqueNames = cparser->getAllSeqsMap();
714 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715 groups = sparser->getNamesOfGroups();
716 uniqueNames = sparser->getAllSeqsMap();
719 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
722 ofstream out, out1, out2;
723 m->openOutputFile(outputFileName, out); out.close();
724 m->openOutputFile(accnosFileName, out1); out1.close();
725 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
728 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
730 if (hasCount && dups) {
731 CountTable c; c.readTable(nameFile);
732 if (!m->isBlank(newCountFile)) {
734 m->openInputFile(newCountFile, in2);
738 in2 >> name >> group; m->gobble(in2);
739 c.setAbund(name, group, 0);
743 m->mothurRemove(newCountFile);
744 c.printTable(newCountFile);
747 }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
749 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
753 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
755 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
756 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
760 set<string> doNotRemove;
761 CountTable c; c.readTable(newCountFile);
762 vector<string> namesInTable = c.getNamesOfSeqs();
763 for (int i = 0; i < namesInTable.size(); i++) {
764 int temp = c.getNumSeqs(namesInTable[i]);
765 if (temp == 0) { c.remove(namesInTable[i]); }
766 else { doNotRemove.insert((namesInTable[i])); }
768 //remove names we want to keep from accnos file.
769 set<string> accnosNames = m->readAccnos(accnosFileName);
771 m->openOutputFile(accnosFileName, out2);
772 for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
773 if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
776 c.printTable(newCountFile);
780 if (hasCount) { delete cparser; }
781 else { delete sparser; }
783 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
786 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
791 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
792 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
796 m->openOutputFile(outputFileName+".temp", out);
797 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
800 m->appendFiles(outputFileName, outputFileName+".temp");
801 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
803 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
805 //remove file made for uchime
806 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
808 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
811 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
812 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
813 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
816 //set accnos file as new current accnosfile
818 itTypes = outputTypes.find("accnos");
819 if (itTypes != outputTypes.end()) {
820 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
823 m->mothurOutEndLine();
824 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
825 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
826 m->mothurOutEndLine();
831 catch(exception& e) {
832 m->errorOut(e, "ChimeraUchimeCommand", "execute");
836 //**********************************************************************************************************************
837 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
839 map<string, string>::iterator itUnique;
844 m->openInputFile(accnosFileName, in2);
847 m->openOutputFile(accnosFileName+".temp", out2);
850 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
851 set<string>::iterator itNames;
852 set<string> chimerasInFile;
853 set<string>::iterator itChimeras;
857 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
859 in2 >> name; m->gobble(in2);
862 itUnique = uniqueNames.find(name);
864 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
866 itChimeras = chimerasInFile.find((itUnique->second));
868 if (itChimeras == chimerasInFile.end()) {
869 out2 << itUnique->second << endl;
870 chimerasInFile.insert((itUnique->second));
878 m->mothurRemove(accnosFileName);
879 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
885 m->openInputFile(outputFileName, in);
888 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
889 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
892 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
895 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
896 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
897 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
898 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
903 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
906 in >> temp1; m->gobble(in);
907 in >> name; m->gobble(in);
908 in >> parent1; m->gobble(in);
909 in >> parent2; m->gobble(in);
910 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
913 //parse name - name will look like U68590/ab=1/
914 string restOfName = "";
915 int pos = name.find_first_of('/');
916 if (pos != string::npos) {
917 restOfName = name.substr(pos);
918 name = name.substr(0, pos);
922 itUnique = uniqueNames.find(name);
924 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
926 name = itUnique->second;
927 //is this name already in the file
928 itNames = namesInFile.find((name));
930 if (itNames == namesInFile.end()) { //no not in file
931 if (flag == "N") { //are you really a no??
932 //is this sequence really not chimeric??
933 itChimeras = chimerasInFile.find(name);
935 //then you really are a no so print, otherwise skip
936 if (itChimeras == chimerasInFile.end()) { print = true; }
937 }else{ print = true; }
942 out << temp1 << '\t' << name << restOfName << '\t';
943 namesInFile.insert(name);
945 //parse parent1 names
946 if (parent1 != "*") {
948 pos = parent1.find_first_of('/');
949 if (pos != string::npos) {
950 restOfName = parent1.substr(pos);
951 parent1 = parent1.substr(0, pos);
954 itUnique = uniqueNames.find(parent1);
955 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
956 else { out << itUnique->second << restOfName << '\t'; }
957 }else { out << parent1 << '\t'; }
959 //parse parent2 names
960 if (parent2 != "*") {
962 pos = parent2.find_first_of('/');
963 if (pos != string::npos) {
964 restOfName = parent2.substr(pos);
965 parent2 = parent2.substr(0, pos);
968 itUnique = uniqueNames.find(parent2);
969 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
970 else { out << itUnique->second << restOfName << '\t'; }
971 }else { out << parent2 << '\t'; }
973 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
979 m->mothurRemove(outputFileName);
980 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
984 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
986 ------------------------------------------------------------------------
987 Query ( 179 nt) F21Fcsw_11639/ab=591/
988 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
989 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
991 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
992 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
993 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
994 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
995 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
996 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
998 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
999 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1000 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1001 Diffs NNN N N N N N BB NNN
1002 Votes 000 0 0 0 0 0 ++ 000
1003 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1005 A 159 TGGAACTGAGACACGGTCCAA 179
1006 Q 159 TGGAACTGAGACACGGTCCAA 179
1007 B 161 TGGAACTGAGACACGGTCCAA 181
1010 Model BBBBBBBBBBBBBBBBBBBBB
1012 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1013 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1017 m->openInputFile(alnsFileName, in3);
1020 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1023 namesInFile.clear();
1026 while (!in3.eof()) {
1027 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1030 line = m->getline(in3);
1034 istringstream iss(line);
1037 //are you a name line
1038 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1040 for (int i = 0; i < line.length(); i++) {
1042 if (line[i] == ')') { break; }
1043 else { out3 << line[i]; }
1046 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1047 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1049 out << line[spot] << line[spot+1];
1051 name = line.substr(spot+2);
1053 //parse name - name will either look like U68590/ab=1/ or U68590
1054 string restOfName = "";
1055 int pos = name.find_first_of('/');
1056 if (pos != string::npos) {
1057 restOfName = name.substr(pos);
1058 name = name.substr(0, pos);
1062 itUnique = uniqueNames.find(name);
1064 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1066 //only limit repeats on query names
1067 if (temp == "Query") {
1068 itNames = namesInFile.find((itUnique->second));
1070 if (itNames == namesInFile.end()) {
1071 out << itUnique->second << restOfName << endl;
1072 namesInFile.insert((itUnique->second));
1074 }else { out << itUnique->second << restOfName << endl; }
1079 }else { //not need to alter line
1080 out3 << line << endl;
1082 }else { out3 << endl; }
1087 m->mothurRemove(alnsFileName);
1088 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1093 catch(exception& e) {
1094 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1098 //**********************************************************************************************************************
1099 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1102 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1105 m->openOutputFile(filename, out);
1107 //print new file in order of
1108 for (int i = 0; i < nameMapCount.size(); i++) {
1109 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1115 catch(exception& e) {
1116 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1120 //**********************************************************************************************************************
1121 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1123 //create input file for uchime
1124 //read through fastafile and store info
1126 m->openInputFile(filename, in);
1130 if (m->control_pressed) { in.close(); return 0; }
1132 Sequence seq(in); m->gobble(in);
1133 seqs[seq.getName()] = seq.getAligned();
1139 catch(exception& e) {
1140 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1144 //**********************************************************************************************************************
1146 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1148 string nameFile = "";
1150 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1152 //use unique.seqs to create new name and fastafile
1153 string inputString = "fasta=" + inputFile;
1154 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1155 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1156 m->mothurCalling = true;
1158 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1159 uniqueCommand->execute();
1161 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1163 delete uniqueCommand;
1164 m->mothurCalling = false;
1165 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1167 nameFile = filenames["name"][0];
1168 inputFile = filenames["fasta"][0];
1172 catch(exception& e) {
1173 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1177 //**********************************************************************************************************************
1178 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1182 int numChimeras = 0;
1184 ofstream outCountList;
1185 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1187 for (int i = start; i < end; i++) {
1188 int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
1191 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1192 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1194 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1195 totalSeqs += numSeqs;
1197 if (m->control_pressed) { return 0; }
1199 //remove file made for uchime
1200 if (!m->debug) { m->mothurRemove(filename); }
1201 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1203 //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1204 //This table will zero out group counts for seqs determined to be chimeric by that group.
1206 if (!m->isBlank(accnos+groups[i])) {
1208 m->openInputFile(accnos+groups[i], in);
1212 in >> name; m->gobble(in);
1213 outCountList << name << '\t' << groups[i] << endl;
1217 map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1218 map<string, string>::iterator itN;
1220 m->openOutputFile(accnos+groups[i]+".temp", out);
1222 in >> name; m->gobble(in);
1223 itN = thisnamemap.find(name);
1224 if (itN != thisnamemap.end()) {
1225 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1226 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1228 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1232 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1239 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1240 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1241 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1243 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1246 if (hasCount && dups) { outCountList.close(); }
1251 catch(exception& e) {
1252 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1256 //**********************************************************************************************************************
1258 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1261 outputFName = m->getFullPathName(outputFName);
1262 filename = m->getFullPathName(filename);
1263 alns = m->getFullPathName(alns);
1265 //to allow for spaces in the path
1266 outputFName = "\"" + outputFName + "\"";
1267 filename = "\"" + filename + "\"";
1268 alns = "\"" + alns + "\"";
1270 vector<char*> cPara;
1272 string uchimeCommand = uchimeLocation;
1273 uchimeCommand = "\"" + uchimeCommand + "\" ";
1276 tempUchime= new char[uchimeCommand.length()+1];
1278 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1279 cPara.push_back(tempUchime);
1281 //are you using a reference file
1282 if (templatefile != "self") {
1283 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1284 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1285 filename = outputFileName;
1286 filename = "\"" + filename + "\"";
1287 //add reference file
1288 char* tempRef = new char[5];
1289 //strcpy(tempRef, "--db");
1290 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1291 cPara.push_back(tempRef);
1292 char* tempR = new char[templatefile.length()+1];
1293 //strcpy(tempR, templatefile.c_str());
1294 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1295 cPara.push_back(tempR);
1298 char* tempIn = new char[8];
1299 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1300 //strcpy(tempIn, "--input");
1301 cPara.push_back(tempIn);
1302 char* temp = new char[filename.length()+1];
1303 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1304 //strcpy(temp, filename.c_str());
1305 cPara.push_back(temp);
1307 char* tempO = new char[12];
1308 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1309 //strcpy(tempO, "--uchimeout");
1310 cPara.push_back(tempO);
1311 char* tempout = new char[outputFName.length()+1];
1312 //strcpy(tempout, outputFName.c_str());
1313 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1314 cPara.push_back(tempout);
1317 char* tempA = new char[13];
1318 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1319 //strcpy(tempA, "--uchimealns");
1320 cPara.push_back(tempA);
1321 char* tempa = new char[alns.length()+1];
1322 //strcpy(tempa, alns.c_str());
1323 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1324 cPara.push_back(tempa);
1328 char* tempA = new char[9];
1329 *tempA = '\0'; strncat(tempA, "--strand", 8);
1330 cPara.push_back(tempA);
1331 char* tempa = new char[strand.length()+1];
1332 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1333 cPara.push_back(tempa);
1337 char* tempskew = new char[9];
1338 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1339 //strcpy(tempskew, "--abskew");
1340 cPara.push_back(tempskew);
1341 char* tempSkew = new char[abskew.length()+1];
1342 //strcpy(tempSkew, abskew.c_str());
1343 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1344 cPara.push_back(tempSkew);
1348 char* tempminh = new char[7];
1349 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1350 //strcpy(tempminh, "--minh");
1351 cPara.push_back(tempminh);
1352 char* tempMinH = new char[minh.length()+1];
1353 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1354 //strcpy(tempMinH, minh.c_str());
1355 cPara.push_back(tempMinH);
1359 char* tempmindiv = new char[9];
1360 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1361 //strcpy(tempmindiv, "--mindiv");
1362 cPara.push_back(tempmindiv);
1363 char* tempMindiv = new char[mindiv.length()+1];
1364 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1365 //strcpy(tempMindiv, mindiv.c_str());
1366 cPara.push_back(tempMindiv);
1370 char* tempxn = new char[5];
1371 //strcpy(tempxn, "--xn");
1372 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1373 cPara.push_back(tempxn);
1374 char* tempXn = new char[xn.length()+1];
1375 //strcpy(tempXn, xn.c_str());
1376 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1377 cPara.push_back(tempXn);
1381 char* tempdn = new char[5];
1382 //strcpy(tempdn, "--dn");
1383 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1384 cPara.push_back(tempdn);
1385 char* tempDn = new char[dn.length()+1];
1386 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1387 //strcpy(tempDn, dn.c_str());
1388 cPara.push_back(tempDn);
1392 char* tempxa = new char[5];
1393 //strcpy(tempxa, "--xa");
1394 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1395 cPara.push_back(tempxa);
1396 char* tempXa = new char[xa.length()+1];
1397 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1398 //strcpy(tempXa, xa.c_str());
1399 cPara.push_back(tempXa);
1403 char* tempchunks = new char[9];
1404 //strcpy(tempchunks, "--chunks");
1405 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1406 cPara.push_back(tempchunks);
1407 char* tempChunks = new char[chunks.length()+1];
1408 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1409 //strcpy(tempChunks, chunks.c_str());
1410 cPara.push_back(tempChunks);
1414 char* tempminchunk = new char[11];
1415 //strcpy(tempminchunk, "--minchunk");
1416 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1417 cPara.push_back(tempminchunk);
1418 char* tempMinchunk = new char[minchunk.length()+1];
1419 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1420 //strcpy(tempMinchunk, minchunk.c_str());
1421 cPara.push_back(tempMinchunk);
1424 if (useIdsmoothwindow) {
1425 char* tempidsmoothwindow = new char[17];
1426 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1427 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1428 cPara.push_back(tempidsmoothwindow);
1429 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1430 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1431 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1432 cPara.push_back(tempIdsmoothwindow);
1435 /*if (useMinsmoothid) {
1436 char* tempminsmoothid = new char[14];
1437 //strcpy(tempminsmoothid, "--minsmoothid");
1438 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1439 cPara.push_back(tempminsmoothid);
1440 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1441 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1442 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1443 cPara.push_back(tempMinsmoothid);
1447 char* tempmaxp = new char[7];
1448 //strcpy(tempmaxp, "--maxp");
1449 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1450 cPara.push_back(tempmaxp);
1451 char* tempMaxp = new char[maxp.length()+1];
1452 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1453 //strcpy(tempMaxp, maxp.c_str());
1454 cPara.push_back(tempMaxp);
1458 char* tempskipgaps = new char[13];
1459 //strcpy(tempskipgaps, "--[no]skipgaps");
1460 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1461 cPara.push_back(tempskipgaps);
1465 char* tempskipgaps2 = new char[14];
1466 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1467 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1468 cPara.push_back(tempskipgaps2);
1472 char* tempminlen = new char[9];
1473 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1474 //strcpy(tempminlen, "--minlen");
1475 cPara.push_back(tempminlen);
1476 char* tempMinlen = new char[minlen.length()+1];
1477 //strcpy(tempMinlen, minlen.c_str());
1478 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1479 cPara.push_back(tempMinlen);
1483 char* tempmaxlen = new char[9];
1484 //strcpy(tempmaxlen, "--maxlen");
1485 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1486 cPara.push_back(tempmaxlen);
1487 char* tempMaxlen = new char[maxlen.length()+1];
1488 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1489 //strcpy(tempMaxlen, maxlen.c_str());
1490 cPara.push_back(tempMaxlen);
1494 char* tempucl = new char[5];
1495 strcpy(tempucl, "--ucl");
1496 cPara.push_back(tempucl);
1499 if (useQueryfract) {
1500 char* tempqueryfract = new char[13];
1501 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1502 //strcpy(tempqueryfract, "--queryfract");
1503 cPara.push_back(tempqueryfract);
1504 char* tempQueryfract = new char[queryfract.length()+1];
1505 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1506 //strcpy(tempQueryfract, queryfract.c_str());
1507 cPara.push_back(tempQueryfract);
1511 char** uchimeParameters;
1512 uchimeParameters = new char*[cPara.size()];
1513 string commandString = "";
1514 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1515 //int numArgs = cPara.size();
1517 //uchime_main(numArgs, uchimeParameters);
1518 //cout << "commandString = " << commandString << endl;
1519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1521 commandString = "\"" + commandString + "\"";
1523 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1524 system(commandString.c_str());
1527 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1528 delete[] uchimeParameters;
1530 //remove "" from filenames
1531 outputFName = outputFName.substr(1, outputFName.length()-2);
1532 filename = filename.substr(1, filename.length()-2);
1533 alns = alns.substr(1, alns.length()-2);
1535 if (m->control_pressed) { return 0; }
1537 //create accnos file from uchime results
1539 m->openInputFile(outputFName, in);
1542 m->openOutputFile(accnos, out);
1548 if (m->control_pressed) { break; }
1551 string chimeraFlag = "";
1552 //in >> chimeraFlag >> name;
1554 string line = m->getline(in);
1555 vector<string> pieces = m->splitWhiteSpace(line);
1556 if (pieces.size() > 2) {
1558 //fix name if needed
1559 if (templatefile == "self") {
1560 name = name.substr(0, name.length()-1); //rip off last /
1561 name = name.substr(0, name.find_last_of('/'));
1564 chimeraFlag = pieces[pieces.size()-1];
1566 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1569 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1575 //if (templatefile != "self") { m->mothurRemove(filename); }
1579 catch(exception& e) {
1580 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1584 /**************************************************************************************************/
1585 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1586 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1590 m->openInputFile(filename, in);
1593 m->openOutputFile(output, out);
1596 if (m->control_pressed) { break; }
1598 Sequence seq(in); m->gobble(in);
1600 if (seq.getName() != "") { seq.printSequence(out); }
1607 catch(exception& e) {
1608 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1612 /**************************************************************************************************/
1614 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1620 vector<string> files;
1622 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1623 //break up file into multiple files
1624 m->divideFile(filename, processors, files);
1626 if (m->control_pressed) { return 0; }
1628 //loop through and create all the processes you want
1629 while (process != processors) {
1633 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1635 }else if (pid == 0){
1636 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1638 //pass numSeqs to parent
1640 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1641 m->openOutputFile(tempFile, out);
1643 out << numChimeras << endl;
1648 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1649 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1655 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1657 //force parent to wait until all the processes are done
1658 for (int i=0;i<processIDS.size();i++) {
1659 int temp = processIDS[i];
1663 for (int i = 0; i < processIDS.size(); i++) {
1665 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1666 m->openInputFile(tempFile, in);
1669 in >> tempNum; m->gobble(in);
1672 numChimeras += tempNum;
1674 in.close(); m->mothurRemove(tempFile);
1677 //////////////////////////////////////////////////////////////////////////////////////////////////////
1678 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1679 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1680 //////////////////////////////////////////////////////////////////////////////////////////////////////
1685 map<int, ofstream*> filehandles;
1686 map<int, ofstream*>::iterator it3;
1689 for (int i = 0; i < processors; i++) {
1690 temp = new ofstream;
1691 filehandles[i] = temp;
1692 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1693 files.push_back(filename+toString(i)+".temp");
1697 m->openInputFile(filename, in);
1701 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1703 Sequence tempSeq(in); m->gobble(in);
1705 if (tempSeq.getName() != "") {
1706 tempSeq.printSequence(*(filehandles[spot]));
1708 if (spot == processors) { spot = 0; }
1714 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1715 (*(it3->second)).close();
1719 //sanity check for number of processors
1720 if (count < processors) { processors = count; }
1722 vector<uchimeData*> pDataArray;
1723 DWORD dwThreadIdArray[processors-1];
1724 HANDLE hThreadArray[processors-1];
1725 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1727 //Create processor worker threads.
1728 for( int i=1; i<processors; i++ ){
1729 // Allocate memory for thread data.
1730 string extension = toString(i) + ".temp";
1732 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
1733 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1734 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1736 pDataArray.push_back(tempUchime);
1737 processIDS.push_back(i);
1739 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1740 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1741 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1745 //using the main process as a worker saves time and memory
1746 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1748 //Wait until all threads have terminated.
1749 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1751 //Close all thread handles and free memory allocations.
1752 for(int i=0; i < pDataArray.size(); i++){
1753 num += pDataArray[i]->count;
1754 numChimeras += pDataArray[i]->numChimeras;
1755 CloseHandle(hThreadArray[i]);
1756 delete pDataArray[i];
1760 //append output files
1761 for(int i=0;i<processIDS.size();i++){
1762 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1763 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1765 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1766 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1769 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1770 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1774 //get rid of the file pieces.
1775 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1778 catch(exception& e) {
1779 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1783 /**************************************************************************************************/
1785 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1792 CountTable newCount;
1793 if (hasCount && dups) { newCount.readTable(nameFile); }
1796 if (groups.size() < processors) { processors = groups.size(); }
1798 //divide the groups between the processors
1799 vector<linePair> lines;
1800 int numGroupsPerProcessor = groups.size() / processors;
1801 for (int i = 0; i < processors; i++) {
1802 int startIndex = i * numGroupsPerProcessor;
1803 int endIndex = (i+1) * numGroupsPerProcessor;
1804 if(i == (processors - 1)){ endIndex = groups.size(); }
1805 lines.push_back(linePair(startIndex, endIndex));
1808 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1810 //loop through and create all the processes you want
1811 while (process != processors) {
1815 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1817 }else if (pid == 0){
1818 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1820 //pass numSeqs to parent
1822 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1823 m->openOutputFile(tempFile, out);
1829 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1830 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1836 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1838 //force parent to wait until all the processes are done
1839 for (int i=0;i<processIDS.size();i++) {
1840 int temp = processIDS[i];
1844 for (int i = 0; i < processIDS.size(); i++) {
1846 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1847 m->openInputFile(tempFile, in);
1848 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1849 in.close(); m->mothurRemove(tempFile);
1853 //////////////////////////////////////////////////////////////////////////////////////////////////////
1854 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1855 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1856 //////////////////////////////////////////////////////////////////////////////////////////////////////
1858 vector<uchimeData*> pDataArray;
1859 DWORD dwThreadIdArray[processors-1];
1860 HANDLE hThreadArray[processors-1];
1862 //Create processor worker threads.
1863 for( int i=1; i<processors; i++ ){
1864 // Allocate memory for thread data.
1865 string extension = toString(i) + ".temp";
1867 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1868 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1869 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1871 pDataArray.push_back(tempUchime);
1872 processIDS.push_back(i);
1874 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1875 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1876 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1880 //using the main process as a worker saves time and memory
1881 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1883 //Wait until all threads have terminated.
1884 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1886 //Close all thread handles and free memory allocations.
1887 for(int i=0; i < pDataArray.size(); i++){
1888 num += pDataArray[i]->count;
1889 CloseHandle(hThreadArray[i]);
1890 delete pDataArray[i];
1897 if (hasCount && dups) {
1898 if (!m->isBlank(accnos + ".byCount")) {
1900 m->openInputFile(accnos + ".byCount", in2);
1903 while (!in2.eof()) {
1904 in2 >> name >> group; m->gobble(in2);
1905 newCount.setAbund(name, group, 0);
1909 m->mothurRemove(accnos + ".byCount");
1912 //append output files
1913 for(int i=0;i<processIDS.size();i++){
1914 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1915 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1917 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1918 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1921 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1922 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1925 if (hasCount && dups) {
1926 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1928 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1931 while (!in2.eof()) {
1932 in2 >> name >> group; m->gobble(in2);
1933 newCount.setAbund(name, group, 0);
1937 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1942 //print new *.pick.count_table
1943 if (hasCount && dups) { newCount.printTable(newCountFile); }
1948 catch(exception& e) {
1949 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1953 /**************************************************************************************************/