2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 catch(exception& e) {
136 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
143 abort = false; calledHelp = false; hasName=false; hasCount=false;
144 ReferenceDB* rdb = ReferenceDB::getInstance();
146 //allow user to run help
147 if(option == "help") { help(); abort = true; calledHelp = true; }
148 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151 vector<string> myArray = setParameters();
153 OptionParser parser(option);
154 map<string,string> parameters = parser.getParameters();
156 ValidParameters validParameter("chimera.uchime");
157 map<string,string>::iterator it;
159 //check to make sure all parameters are valid for command
160 for (it = parameters.begin(); it != parameters.end(); it++) {
161 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
164 vector<string> tempOutNames;
165 outputTypes["chimera"] = tempOutNames;
166 outputTypes["accnos"] = tempOutNames;
167 outputTypes["alns"] = tempOutNames;
168 outputTypes["count"] = tempOutNames;
170 //if the user changes the input directory command factory will send this info to us in the output parameter
171 string inputDir = validParameter.validFile(parameters, "inputdir", false);
172 if (inputDir == "not found"){ inputDir = ""; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", false);
176 if (fastafile == "not found") {
177 //if there is a current fasta file, use it
178 string filename = m->getFastaFile();
179 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182 m->splitAtDash(fastafile, fastaFileNames);
184 //go through files and make sure they are good, if not, then disregard them
185 for (int i = 0; i < fastaFileNames.size(); i++) {
188 if (fastaFileNames[i] == "current") {
189 fastaFileNames[i] = m->getFastaFile();
190 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
193 //erase from file list
194 fastaFileNames.erase(fastaFileNames.begin()+i);
201 if (inputDir != "") {
202 string path = m->hasPath(fastaFileNames[i]);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
210 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212 //if you can't open it, try default location
213 if (ableToOpen == 1) {
214 if (m->getDefaultPath() != "") { //default path is set
215 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220 fastaFileNames[i] = tryPath;
224 if (ableToOpen == 1) {
225 if (m->getOutputDir() != "") { //default path is set
226 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231 fastaFileNames[i] = tryPath;
237 if (ableToOpen == 1) {
238 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
239 //erase from file list
240 fastaFileNames.erase(fastaFileNames.begin()+i);
243 m->setFastaFile(fastaFileNames[i]);
248 //make sure there is at least one valid file left
249 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
253 //check for required parameters
254 namefile = validParameter.validFile(parameters, "name", false);
255 if (namefile == "not found") { namefile = ""; }
257 m->splitAtDash(namefile, nameFileNames);
259 //go through files and make sure they are good, if not, then disregard them
260 for (int i = 0; i < nameFileNames.size(); i++) {
263 if (nameFileNames[i] == "current") {
264 nameFileNames[i] = m->getNameFile();
265 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
268 //erase from file list
269 nameFileNames.erase(nameFileNames.begin()+i);
276 if (inputDir != "") {
277 string path = m->hasPath(nameFileNames[i]);
278 //if the user has not given a path then, add inputdir. else leave path alone.
279 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
285 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287 //if you can't open it, try default location
288 if (ableToOpen == 1) {
289 if (m->getDefaultPath() != "") { //default path is set
290 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295 nameFileNames[i] = tryPath;
299 if (ableToOpen == 1) {
300 if (m->getOutputDir() != "") { //default path is set
301 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306 nameFileNames[i] = tryPath;
312 if (ableToOpen == 1) {
313 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
314 //erase from file list
315 nameFileNames.erase(nameFileNames.begin()+i);
318 m->setNameFile(nameFileNames[i]);
324 if (nameFileNames.size() != 0) { hasName = true; }
326 //check for required parameters
327 vector<string> countfileNames;
328 countfile = validParameter.validFile(parameters, "count", false);
329 if (countfile == "not found") {
332 m->splitAtDash(countfile, countfileNames);
334 //go through files and make sure they are good, if not, then disregard them
335 for (int i = 0; i < countfileNames.size(); i++) {
338 if (countfileNames[i] == "current") {
339 countfileNames[i] = m->getCountTableFile();
340 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
343 //erase from file list
344 countfileNames.erase(countfileNames.begin()+i);
351 if (inputDir != "") {
352 string path = m->hasPath(countfileNames[i]);
353 //if the user has not given a path then, add inputdir. else leave path alone.
354 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
360 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362 //if you can't open it, try default location
363 if (ableToOpen == 1) {
364 if (m->getDefaultPath() != "") { //default path is set
365 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370 countfileNames[i] = tryPath;
374 if (ableToOpen == 1) {
375 if (m->getOutputDir() != "") { //default path is set
376 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381 countfileNames[i] = tryPath;
387 if (ableToOpen == 1) {
388 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
389 //erase from file list
390 countfileNames.erase(countfileNames.begin()+i);
393 m->setCountTableFile(countfileNames[i]);
399 if (countfileNames.size() != 0) { hasCount = true; }
401 //make sure there is at least one valid file left
402 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404 if (!hasName && hasCount) { nameFileNames = countfileNames; }
406 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408 bool hasGroup = true;
409 groupfile = validParameter.validFile(parameters, "group", false);
410 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
412 m->splitAtDash(groupfile, groupFileNames);
414 //go through files and make sure they are good, if not, then disregard them
415 for (int i = 0; i < groupFileNames.size(); i++) {
418 if (groupFileNames[i] == "current") {
419 groupFileNames[i] = m->getGroupFile();
420 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
423 //erase from file list
424 groupFileNames.erase(groupFileNames.begin()+i);
431 if (inputDir != "") {
432 string path = m->hasPath(groupFileNames[i]);
433 //if the user has not given a path then, add inputdir. else leave path alone.
434 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
440 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442 //if you can't open it, try default location
443 if (ableToOpen == 1) {
444 if (m->getDefaultPath() != "") { //default path is set
445 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450 groupFileNames[i] = tryPath;
454 if (ableToOpen == 1) {
455 if (m->getOutputDir() != "") { //default path is set
456 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461 groupFileNames[i] = tryPath;
467 if (ableToOpen == 1) {
468 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
469 //erase from file list
470 groupFileNames.erase(groupFileNames.begin()+i);
473 m->setGroupFile(groupFileNames[i]);
478 //make sure there is at least one valid file left
479 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 //if the user changes the output directory command factory will send this info to us in the output parameter
490 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
493 it = parameters.find("reference");
494 //user has given a template file
495 if(it != parameters.end()){
496 if (it->second == "self") { templatefile = "self"; }
498 path = m->hasPath(it->second);
499 //if the user has not given a path then, add inputdir. else leave path alone.
500 if (path == "") { parameters["reference"] = inputDir + it->second; }
502 templatefile = validParameter.validFile(parameters, "reference", true);
503 if (templatefile == "not open") { abort = true; }
504 else if (templatefile == "not found") { //check for saved reference sequences
505 if (rdb->getSavedReference() != "") {
506 templatefile = rdb->getSavedReference();
507 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
510 m->mothurOutEndLine();
515 }else if (hasName) { templatefile = "self"; }
516 else if (hasCount) { templatefile = "self"; }
518 if (rdb->getSavedReference() != "") {
519 templatefile = rdb->getSavedReference();
520 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
523 m->mothurOutEndLine();
524 templatefile = ""; abort = true;
528 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
529 m->setProcessors(temp);
530 m->mothurConvert(temp, processors);
532 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
533 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
536 chimealns = m->isTrue(temp);
538 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
539 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
540 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
541 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
542 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
543 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
544 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
545 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
546 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
547 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
548 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
549 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
551 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
553 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
554 ucl = m->isTrue(temp);
556 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
557 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
559 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
560 skipgaps = m->isTrue(temp);
562 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
563 skipgaps2 = m->isTrue(temp);
566 temp = validParameter.validFile(parameters, "dereplicate", false);
567 if (temp == "not found") {
568 if (groupfile != "") { temp = "false"; }
569 else { temp = "true"; }
571 dups = m->isTrue(temp);
574 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
577 //look for uchime exe
579 string tempPath = path;
580 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581 path = path.substr(0, (tempPath.find_last_of('m')));
583 string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
587 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
588 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589 newCommand->execute();
591 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
592 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593 newCommand->execute();
597 uchimeCommand = path + "uchime.exe";
600 //test to make sure uchime exists
602 uchimeCommand = m->getFullPathName(uchimeCommand);
603 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604 if(ableToOpen == 1) {
605 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606 //check to see if uchime is in the path??
608 string uLocation = m->findProgramPath("uchime");
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
615 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
618 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
619 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620 }else { uchimeLocation = uchimeCommand; }
622 uchimeLocation = m->getFullPathName(uchimeLocation);
625 catch(exception& e) {
626 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
630 //***************************************************************************************************************
632 int ChimeraUchimeCommand::execute(){
635 if (abort == true) { if (calledHelp) { return 0; } return 2; }
637 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
639 for (int s = 0; s < fastaFileNames.size(); s++) {
641 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
643 int start = time(NULL);
644 string nameFile = "";
645 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
646 map<string, string> variables;
647 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648 string outputFileName = getOutputFileName("chimera", variables);
649 string accnosFileName = getOutputFileName("accnos", variables);
650 string alnsFileName = getOutputFileName("alns", variables);
651 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652 string newCountFile = "";
654 //you provided a groupfile
655 string groupFile = "";
656 bool hasGroup = false;
657 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
660 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662 newCountFile = getOutputFileName("count", variables);
665 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
667 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669 nameFile = nameFileNames[s];
670 }else { nameFile = getNamesFile(fastaFileNames[s]); }
672 map<string, string> seqs;
673 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
676 vector<seqPriorityNode> nameMapCount;
680 ct.readTable(nameFile);
681 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682 int num = ct.getNumSeqs(it->first);
683 if (num == 0) { error = 1; }
685 seqPriorityNode temp(num, it->second, it->first);
686 nameMapCount.push_back(temp);
690 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
692 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
693 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
695 printFile(nameMapCount, newFasta);
696 fastaFileNames[s] = newFasta;
699 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
702 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703 nameFile = nameFileNames[s];
704 }else { nameFile = getNamesFile(fastaFileNames[s]); }
706 //Parse sequences by group
707 vector<string> groups;
708 map<string, string> uniqueNames;
710 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711 groups = cparser->getNamesOfGroups();
712 uniqueNames = cparser->getAllSeqsMap();
714 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715 groups = sparser->getNamesOfGroups();
716 uniqueNames = sparser->getAllSeqsMap();
719 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
722 ofstream out, out1, out2;
723 m->openOutputFile(outputFileName, out); out.close();
724 m->openOutputFile(accnosFileName, out1); out1.close();
725 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
728 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
731 if (hasCount && !dups) {
732 CountTable newCount; newCount.readTable(nameFile);
734 if (!m->isBlank(newCountFile)) {
736 m->openInputFile(newCountFile, in2);
740 in2 >> name >> group; m->gobble(in2);
741 newCount.setAbund(name, group, 0);
745 m->mothurRemove(newCountFile);
746 newCount.printTable(newCountFile);
749 }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
751 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
755 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
757 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
758 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
760 /*if (hasCount) { //removed empty seqs
762 m->openOutputFile(accnosFileName, out2);
764 CountTable c; c.readTable(newCountFile);
765 vector<string> nseqs = c.getNamesOfSeqs();
766 vector<string> ngroups = c.getNamesOfGroups();
767 for (int l = 0; l < nseqs.size(); l++) {
768 if (c.getNumSeqs(nseqs[l]) == 0) {
770 out2 << nseqs[l] << endl;
773 for (int l = 0; l < ngroups.size(); l++) {
774 if (c.getGroupCount(ngroups[l]) == 0) { c.removeGroup(ngroups[l]); }
777 c.printTable(newCountFile);
781 if (hasCount) { delete cparser; }
782 else { delete sparser; }
784 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
787 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
792 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
797 m->openOutputFile(outputFileName+".temp", out);
798 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
801 m->appendFiles(outputFileName, outputFileName+".temp");
802 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
804 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
806 //remove file made for uchime
807 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
809 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
812 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
813 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
814 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
817 //set accnos file as new current accnosfile
819 itTypes = outputTypes.find("accnos");
820 if (itTypes != outputTypes.end()) {
821 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
824 m->mothurOutEndLine();
825 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
826 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
827 m->mothurOutEndLine();
832 catch(exception& e) {
833 m->errorOut(e, "ChimeraUchimeCommand", "execute");
837 //**********************************************************************************************************************
838 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
840 map<string, string>::iterator itUnique;
845 m->openInputFile(accnosFileName, in2);
848 m->openOutputFile(accnosFileName+".temp", out2);
851 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
852 set<string>::iterator itNames;
853 set<string> chimerasInFile;
854 set<string>::iterator itChimeras;
858 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
860 in2 >> name; m->gobble(in2);
863 itUnique = uniqueNames.find(name);
865 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
867 itChimeras = chimerasInFile.find((itUnique->second));
869 if (itChimeras == chimerasInFile.end()) {
870 out2 << itUnique->second << endl;
871 chimerasInFile.insert((itUnique->second));
879 m->mothurRemove(accnosFileName);
880 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
886 m->openInputFile(outputFileName, in);
889 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
890 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
893 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
896 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
897 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
898 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
899 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
904 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
907 in >> temp1; m->gobble(in);
908 in >> name; m->gobble(in);
909 in >> parent1; m->gobble(in);
910 in >> parent2; m->gobble(in);
911 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
914 //parse name - name will look like U68590/ab=1/
915 string restOfName = "";
916 int pos = name.find_first_of('/');
917 if (pos != string::npos) {
918 restOfName = name.substr(pos);
919 name = name.substr(0, pos);
923 itUnique = uniqueNames.find(name);
925 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
927 name = itUnique->second;
928 //is this name already in the file
929 itNames = namesInFile.find((name));
931 if (itNames == namesInFile.end()) { //no not in file
932 if (flag == "N") { //are you really a no??
933 //is this sequence really not chimeric??
934 itChimeras = chimerasInFile.find(name);
936 //then you really are a no so print, otherwise skip
937 if (itChimeras == chimerasInFile.end()) { print = true; }
938 }else{ print = true; }
943 out << temp1 << '\t' << name << restOfName << '\t';
944 namesInFile.insert(name);
946 //parse parent1 names
947 if (parent1 != "*") {
949 pos = parent1.find_first_of('/');
950 if (pos != string::npos) {
951 restOfName = parent1.substr(pos);
952 parent1 = parent1.substr(0, pos);
955 itUnique = uniqueNames.find(parent1);
956 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
957 else { out << itUnique->second << restOfName << '\t'; }
958 }else { out << parent1 << '\t'; }
960 //parse parent2 names
961 if (parent2 != "*") {
963 pos = parent2.find_first_of('/');
964 if (pos != string::npos) {
965 restOfName = parent2.substr(pos);
966 parent2 = parent2.substr(0, pos);
969 itUnique = uniqueNames.find(parent2);
970 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
971 else { out << itUnique->second << restOfName << '\t'; }
972 }else { out << parent2 << '\t'; }
974 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
980 m->mothurRemove(outputFileName);
981 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
985 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
987 ------------------------------------------------------------------------
988 Query ( 179 nt) F21Fcsw_11639/ab=591/
989 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
990 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
992 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
993 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
994 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
995 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
996 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
997 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
999 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1000 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1001 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1002 Diffs NNN N N N N N BB NNN
1003 Votes 000 0 0 0 0 0 ++ 000
1004 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1006 A 159 TGGAACTGAGACACGGTCCAA 179
1007 Q 159 TGGAACTGAGACACGGTCCAA 179
1008 B 161 TGGAACTGAGACACGGTCCAA 181
1011 Model BBBBBBBBBBBBBBBBBBBBB
1013 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1014 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1018 m->openInputFile(alnsFileName, in3);
1021 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1024 namesInFile.clear();
1027 while (!in3.eof()) {
1028 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1031 line = m->getline(in3);
1035 istringstream iss(line);
1038 //are you a name line
1039 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1041 for (int i = 0; i < line.length(); i++) {
1043 if (line[i] == ')') { break; }
1044 else { out3 << line[i]; }
1047 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1048 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1050 out << line[spot] << line[spot+1];
1052 name = line.substr(spot+2);
1054 //parse name - name will either look like U68590/ab=1/ or U68590
1055 string restOfName = "";
1056 int pos = name.find_first_of('/');
1057 if (pos != string::npos) {
1058 restOfName = name.substr(pos);
1059 name = name.substr(0, pos);
1063 itUnique = uniqueNames.find(name);
1065 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1067 //only limit repeats on query names
1068 if (temp == "Query") {
1069 itNames = namesInFile.find((itUnique->second));
1071 if (itNames == namesInFile.end()) {
1072 out << itUnique->second << restOfName << endl;
1073 namesInFile.insert((itUnique->second));
1075 }else { out << itUnique->second << restOfName << endl; }
1080 }else { //not need to alter line
1081 out3 << line << endl;
1083 }else { out3 << endl; }
1088 m->mothurRemove(alnsFileName);
1089 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1094 catch(exception& e) {
1095 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1099 //**********************************************************************************************************************
1100 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1103 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1106 m->openOutputFile(filename, out);
1108 //print new file in order of
1109 for (int i = 0; i < nameMapCount.size(); i++) {
1110 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1116 catch(exception& e) {
1117 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1121 //**********************************************************************************************************************
1122 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1124 //create input file for uchime
1125 //read through fastafile and store info
1127 m->openInputFile(filename, in);
1131 if (m->control_pressed) { in.close(); return 0; }
1133 Sequence seq(in); m->gobble(in);
1134 seqs[seq.getName()] = seq.getAligned();
1140 catch(exception& e) {
1141 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1145 //**********************************************************************************************************************
1147 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1149 string nameFile = "";
1151 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1153 //use unique.seqs to create new name and fastafile
1154 string inputString = "fasta=" + inputFile;
1155 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1156 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1157 m->mothurCalling = true;
1159 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1160 uniqueCommand->execute();
1162 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1164 delete uniqueCommand;
1165 m->mothurCalling = false;
1166 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1168 nameFile = filenames["name"][0];
1169 inputFile = filenames["fasta"][0];
1173 catch(exception& e) {
1174 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1178 //**********************************************************************************************************************
1179 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1183 int numChimeras = 0;
1185 ofstream outCountList;
1186 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1188 for (int i = start; i < end; i++) {
1189 int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
1192 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1193 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1195 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1196 totalSeqs += numSeqs;
1198 if (m->control_pressed) { return 0; }
1200 //remove file made for uchime
1201 if (!m->debug) { m->mothurRemove(filename); }
1202 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1204 //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1205 //This table will zero out group counts for seqs determined to be chimeric by that group.
1207 if (!m->isBlank(accnos+groups[i])) {
1209 m->openInputFile(accnos+groups[i], in);
1213 in >> name; m->gobble(in);
1214 outCountList << name << '\t' << groups[i] << endl;
1218 map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1219 map<string, string>::iterator itN;
1221 m->openOutputFile(accnos+groups[i]+".temp", out);
1223 in >> name; m->gobble(in);
1224 itN = thisnamemap.find(name);
1225 if (itN != thisnamemap.end()) {
1226 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1227 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1229 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1233 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1240 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1241 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1242 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1244 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1247 if (hasCount && dups) { outCountList.close(); }
1252 catch(exception& e) {
1253 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1257 //**********************************************************************************************************************
1259 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1262 outputFName = m->getFullPathName(outputFName);
1263 filename = m->getFullPathName(filename);
1264 alns = m->getFullPathName(alns);
1266 //to allow for spaces in the path
1267 outputFName = "\"" + outputFName + "\"";
1268 filename = "\"" + filename + "\"";
1269 alns = "\"" + alns + "\"";
1271 vector<char*> cPara;
1273 string uchimeCommand = uchimeLocation;
1274 uchimeCommand = "\"" + uchimeCommand + "\" ";
1277 tempUchime= new char[uchimeCommand.length()+1];
1279 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1280 cPara.push_back(tempUchime);
1282 //are you using a reference file
1283 if (templatefile != "self") {
1284 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1285 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1286 filename = outputFileName;
1287 filename = "\"" + filename + "\"";
1288 //add reference file
1289 char* tempRef = new char[5];
1290 //strcpy(tempRef, "--db");
1291 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1292 cPara.push_back(tempRef);
1293 char* tempR = new char[templatefile.length()+1];
1294 //strcpy(tempR, templatefile.c_str());
1295 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1296 cPara.push_back(tempR);
1299 char* tempIn = new char[8];
1300 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1301 //strcpy(tempIn, "--input");
1302 cPara.push_back(tempIn);
1303 char* temp = new char[filename.length()+1];
1304 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1305 //strcpy(temp, filename.c_str());
1306 cPara.push_back(temp);
1308 char* tempO = new char[12];
1309 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1310 //strcpy(tempO, "--uchimeout");
1311 cPara.push_back(tempO);
1312 char* tempout = new char[outputFName.length()+1];
1313 //strcpy(tempout, outputFName.c_str());
1314 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1315 cPara.push_back(tempout);
1318 char* tempA = new char[13];
1319 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1320 //strcpy(tempA, "--uchimealns");
1321 cPara.push_back(tempA);
1322 char* tempa = new char[alns.length()+1];
1323 //strcpy(tempa, alns.c_str());
1324 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1325 cPara.push_back(tempa);
1329 char* tempA = new char[9];
1330 *tempA = '\0'; strncat(tempA, "--strand", 8);
1331 cPara.push_back(tempA);
1332 char* tempa = new char[strand.length()+1];
1333 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1334 cPara.push_back(tempa);
1338 char* tempskew = new char[9];
1339 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1340 //strcpy(tempskew, "--abskew");
1341 cPara.push_back(tempskew);
1342 char* tempSkew = new char[abskew.length()+1];
1343 //strcpy(tempSkew, abskew.c_str());
1344 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1345 cPara.push_back(tempSkew);
1349 char* tempminh = new char[7];
1350 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1351 //strcpy(tempminh, "--minh");
1352 cPara.push_back(tempminh);
1353 char* tempMinH = new char[minh.length()+1];
1354 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1355 //strcpy(tempMinH, minh.c_str());
1356 cPara.push_back(tempMinH);
1360 char* tempmindiv = new char[9];
1361 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1362 //strcpy(tempmindiv, "--mindiv");
1363 cPara.push_back(tempmindiv);
1364 char* tempMindiv = new char[mindiv.length()+1];
1365 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1366 //strcpy(tempMindiv, mindiv.c_str());
1367 cPara.push_back(tempMindiv);
1371 char* tempxn = new char[5];
1372 //strcpy(tempxn, "--xn");
1373 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1374 cPara.push_back(tempxn);
1375 char* tempXn = new char[xn.length()+1];
1376 //strcpy(tempXn, xn.c_str());
1377 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1378 cPara.push_back(tempXn);
1382 char* tempdn = new char[5];
1383 //strcpy(tempdn, "--dn");
1384 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1385 cPara.push_back(tempdn);
1386 char* tempDn = new char[dn.length()+1];
1387 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1388 //strcpy(tempDn, dn.c_str());
1389 cPara.push_back(tempDn);
1393 char* tempxa = new char[5];
1394 //strcpy(tempxa, "--xa");
1395 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1396 cPara.push_back(tempxa);
1397 char* tempXa = new char[xa.length()+1];
1398 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1399 //strcpy(tempXa, xa.c_str());
1400 cPara.push_back(tempXa);
1404 char* tempchunks = new char[9];
1405 //strcpy(tempchunks, "--chunks");
1406 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1407 cPara.push_back(tempchunks);
1408 char* tempChunks = new char[chunks.length()+1];
1409 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1410 //strcpy(tempChunks, chunks.c_str());
1411 cPara.push_back(tempChunks);
1415 char* tempminchunk = new char[11];
1416 //strcpy(tempminchunk, "--minchunk");
1417 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1418 cPara.push_back(tempminchunk);
1419 char* tempMinchunk = new char[minchunk.length()+1];
1420 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1421 //strcpy(tempMinchunk, minchunk.c_str());
1422 cPara.push_back(tempMinchunk);
1425 if (useIdsmoothwindow) {
1426 char* tempidsmoothwindow = new char[17];
1427 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1428 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1429 cPara.push_back(tempidsmoothwindow);
1430 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1431 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1432 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1433 cPara.push_back(tempIdsmoothwindow);
1436 /*if (useMinsmoothid) {
1437 char* tempminsmoothid = new char[14];
1438 //strcpy(tempminsmoothid, "--minsmoothid");
1439 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1440 cPara.push_back(tempminsmoothid);
1441 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1442 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1443 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1444 cPara.push_back(tempMinsmoothid);
1448 char* tempmaxp = new char[7];
1449 //strcpy(tempmaxp, "--maxp");
1450 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1451 cPara.push_back(tempmaxp);
1452 char* tempMaxp = new char[maxp.length()+1];
1453 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1454 //strcpy(tempMaxp, maxp.c_str());
1455 cPara.push_back(tempMaxp);
1459 char* tempskipgaps = new char[13];
1460 //strcpy(tempskipgaps, "--[no]skipgaps");
1461 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1462 cPara.push_back(tempskipgaps);
1466 char* tempskipgaps2 = new char[14];
1467 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1468 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1469 cPara.push_back(tempskipgaps2);
1473 char* tempminlen = new char[9];
1474 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1475 //strcpy(tempminlen, "--minlen");
1476 cPara.push_back(tempminlen);
1477 char* tempMinlen = new char[minlen.length()+1];
1478 //strcpy(tempMinlen, minlen.c_str());
1479 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1480 cPara.push_back(tempMinlen);
1484 char* tempmaxlen = new char[9];
1485 //strcpy(tempmaxlen, "--maxlen");
1486 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1487 cPara.push_back(tempmaxlen);
1488 char* tempMaxlen = new char[maxlen.length()+1];
1489 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1490 //strcpy(tempMaxlen, maxlen.c_str());
1491 cPara.push_back(tempMaxlen);
1495 char* tempucl = new char[5];
1496 strcpy(tempucl, "--ucl");
1497 cPara.push_back(tempucl);
1500 if (useQueryfract) {
1501 char* tempqueryfract = new char[13];
1502 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1503 //strcpy(tempqueryfract, "--queryfract");
1504 cPara.push_back(tempqueryfract);
1505 char* tempQueryfract = new char[queryfract.length()+1];
1506 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1507 //strcpy(tempQueryfract, queryfract.c_str());
1508 cPara.push_back(tempQueryfract);
1512 char** uchimeParameters;
1513 uchimeParameters = new char*[cPara.size()];
1514 string commandString = "";
1515 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1516 //int numArgs = cPara.size();
1518 //uchime_main(numArgs, uchimeParameters);
1519 //cout << "commandString = " << commandString << endl;
1520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1522 commandString = "\"" + commandString + "\"";
1524 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1525 system(commandString.c_str());
1528 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1529 delete[] uchimeParameters;
1531 //remove "" from filenames
1532 outputFName = outputFName.substr(1, outputFName.length()-2);
1533 filename = filename.substr(1, filename.length()-2);
1534 alns = alns.substr(1, alns.length()-2);
1536 if (m->control_pressed) { return 0; }
1538 //create accnos file from uchime results
1540 m->openInputFile(outputFName, in);
1543 m->openOutputFile(accnos, out);
1549 if (m->control_pressed) { break; }
1552 string chimeraFlag = "";
1553 //in >> chimeraFlag >> name;
1555 string line = m->getline(in);
1556 vector<string> pieces = m->splitWhiteSpace(line);
1557 if (pieces.size() > 2) {
1559 //fix name if needed
1560 if (templatefile == "self") {
1561 name = name.substr(0, name.length()-1); //rip off last /
1562 name = name.substr(0, name.find_last_of('/'));
1565 chimeraFlag = pieces[pieces.size()-1];
1567 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1570 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1576 //if (templatefile != "self") { m->mothurRemove(filename); }
1580 catch(exception& e) {
1581 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1585 /**************************************************************************************************/
1586 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1587 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1591 m->openInputFile(filename, in);
1594 m->openOutputFile(output, out);
1597 if (m->control_pressed) { break; }
1599 Sequence seq(in); m->gobble(in);
1601 if (seq.getName() != "") { seq.printSequence(out); }
1608 catch(exception& e) {
1609 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1613 /**************************************************************************************************/
1615 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1621 vector<string> files;
1623 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1624 //break up file into multiple files
1625 m->divideFile(filename, processors, files);
1627 if (m->control_pressed) { return 0; }
1629 //loop through and create all the processes you want
1630 while (process != processors) {
1634 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1636 }else if (pid == 0){
1637 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1639 //pass numSeqs to parent
1641 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1642 m->openOutputFile(tempFile, out);
1644 out << numChimeras << endl;
1649 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1650 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1656 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1658 //force parent to wait until all the processes are done
1659 for (int i=0;i<processIDS.size();i++) {
1660 int temp = processIDS[i];
1664 for (int i = 0; i < processIDS.size(); i++) {
1666 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1667 m->openInputFile(tempFile, in);
1670 in >> tempNum; m->gobble(in);
1673 numChimeras += tempNum;
1675 in.close(); m->mothurRemove(tempFile);
1678 //////////////////////////////////////////////////////////////////////////////////////////////////////
1679 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1680 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1681 //////////////////////////////////////////////////////////////////////////////////////////////////////
1686 map<int, ofstream*> filehandles;
1687 map<int, ofstream*>::iterator it3;
1690 for (int i = 0; i < processors; i++) {
1691 temp = new ofstream;
1692 filehandles[i] = temp;
1693 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1694 files.push_back(filename+toString(i)+".temp");
1698 m->openInputFile(filename, in);
1702 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1704 Sequence tempSeq(in); m->gobble(in);
1706 if (tempSeq.getName() != "") {
1707 tempSeq.printSequence(*(filehandles[spot]));
1709 if (spot == processors) { spot = 0; }
1715 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1716 (*(it3->second)).close();
1720 //sanity check for number of processors
1721 if (count < processors) { processors = count; }
1723 vector<uchimeData*> pDataArray;
1724 DWORD dwThreadIdArray[processors-1];
1725 HANDLE hThreadArray[processors-1];
1726 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1728 //Create processor worker threads.
1729 for( int i=1; i<processors; i++ ){
1730 // Allocate memory for thread data.
1731 string extension = toString(i) + ".temp";
1733 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
1734 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1735 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1737 pDataArray.push_back(tempUchime);
1738 processIDS.push_back(i);
1740 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1741 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1742 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1746 //using the main process as a worker saves time and memory
1747 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1749 //Wait until all threads have terminated.
1750 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1752 //Close all thread handles and free memory allocations.
1753 for(int i=0; i < pDataArray.size(); i++){
1754 num += pDataArray[i]->count;
1755 numChimeras += pDataArray[i]->numChimeras;
1756 CloseHandle(hThreadArray[i]);
1757 delete pDataArray[i];
1761 //append output files
1762 for(int i=0;i<processIDS.size();i++){
1763 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1764 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1766 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1767 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1770 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1771 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1775 //get rid of the file pieces.
1776 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1779 catch(exception& e) {
1780 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1784 /**************************************************************************************************/
1786 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1793 CountTable newCount;
1794 if (hasCount && dups) { newCount.readTable(nameFile); }
1797 if (groups.size() < processors) { processors = groups.size(); }
1799 //divide the groups between the processors
1800 vector<linePair> lines;
1801 int numGroupsPerProcessor = groups.size() / processors;
1802 for (int i = 0; i < processors; i++) {
1803 int startIndex = i * numGroupsPerProcessor;
1804 int endIndex = (i+1) * numGroupsPerProcessor;
1805 if(i == (processors - 1)){ endIndex = groups.size(); }
1806 lines.push_back(linePair(startIndex, endIndex));
1809 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1811 //loop through and create all the processes you want
1812 while (process != processors) {
1816 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1818 }else if (pid == 0){
1819 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1821 //pass numSeqs to parent
1823 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1824 m->openOutputFile(tempFile, out);
1830 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1831 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1837 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1839 //force parent to wait until all the processes are done
1840 for (int i=0;i<processIDS.size();i++) {
1841 int temp = processIDS[i];
1845 for (int i = 0; i < processIDS.size(); i++) {
1847 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1848 m->openInputFile(tempFile, in);
1849 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1850 in.close(); m->mothurRemove(tempFile);
1854 //////////////////////////////////////////////////////////////////////////////////////////////////////
1855 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1856 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1857 //////////////////////////////////////////////////////////////////////////////////////////////////////
1859 vector<uchimeData*> pDataArray;
1860 DWORD dwThreadIdArray[processors-1];
1861 HANDLE hThreadArray[processors-1];
1863 //Create processor worker threads.
1864 for( int i=1; i<processors; i++ ){
1865 // Allocate memory for thread data.
1866 string extension = toString(i) + ".temp";
1868 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1869 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1870 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1872 pDataArray.push_back(tempUchime);
1873 processIDS.push_back(i);
1875 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1876 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1877 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1881 //using the main process as a worker saves time and memory
1882 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1884 //Wait until all threads have terminated.
1885 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1887 //Close all thread handles and free memory allocations.
1888 for(int i=0; i < pDataArray.size(); i++){
1889 num += pDataArray[i]->count;
1890 CloseHandle(hThreadArray[i]);
1891 delete pDataArray[i];
1898 if (hasCount && dups) {
1899 if (!m->isBlank(accnos + ".byCount")) {
1901 m->openInputFile(accnos + ".byCount", in2);
1904 while (!in2.eof()) {
1905 in2 >> name >> group; m->gobble(in2);
1906 newCount.setAbund(name, group, 0);
1910 m->mothurRemove(accnos + ".byCount");
1913 //append output files
1914 for(int i=0;i<processIDS.size();i++){
1915 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1916 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1918 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1919 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1922 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1923 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1926 if (hasCount && dups) {
1927 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1929 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1932 while (!in2.eof()) {
1933 in2 >> name >> group; m->gobble(in2);
1934 newCount.setAbund(name, group, 0);
1938 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1943 //print new *.pick.count_table
1944 if (hasCount && dups) { newCount.printTable(newCountFile); }
1949 catch(exception& e) {
1950 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1954 /**************************************************************************************************/