2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
29 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
30 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
31 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
32 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
33 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
34 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
35 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
36 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
37 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
38 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
41 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
42 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
43 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
44 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
45 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
46 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
47 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
58 //**********************************************************************************************************************
59 string ChimeraUchimeCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
63 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
64 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
65 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
66 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
67 helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
68 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
69 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
70 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
71 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
72 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
73 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
74 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
75 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
76 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
77 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
78 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
79 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
80 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
81 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
82 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
83 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
84 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
85 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
86 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
87 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
88 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
89 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
90 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 helpString += "The chimera.uchime command should be in the following format: \n";
95 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
96 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
97 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
100 catch(exception& e) {
101 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string ChimeraUchimeCommand::getOutputPattern(string type) {
110 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
111 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
112 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
113 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
117 catch(exception& e) {
118 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
122 //**********************************************************************************************************************
123 ChimeraUchimeCommand::ChimeraUchimeCommand(){
125 abort = true; calledHelp = true;
127 vector<string> tempOutNames;
128 outputTypes["chimera"] = tempOutNames;
129 outputTypes["accnos"] = tempOutNames;
130 outputTypes["alns"] = tempOutNames;
132 catch(exception& e) {
133 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137 //***************************************************************************************************************
138 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
140 abort = false; calledHelp = false; hasName=false; hasCount=false;
141 ReferenceDB* rdb = ReferenceDB::getInstance();
143 //allow user to run help
144 if(option == "help") { help(); abort = true; calledHelp = true; }
145 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
148 vector<string> myArray = setParameters();
150 OptionParser parser(option);
151 map<string,string> parameters = parser.getParameters();
153 ValidParameters validParameter("chimera.uchime");
154 map<string,string>::iterator it;
156 //check to make sure all parameters are valid for command
157 for (it = parameters.begin(); it != parameters.end(); it++) {
158 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
161 vector<string> tempOutNames;
162 outputTypes["chimera"] = tempOutNames;
163 outputTypes["accnos"] = tempOutNames;
164 outputTypes["alns"] = tempOutNames;
166 //if the user changes the input directory command factory will send this info to us in the output parameter
167 string inputDir = validParameter.validFile(parameters, "inputdir", false);
168 if (inputDir == "not found"){ inputDir = ""; }
170 //check for required parameters
171 fastafile = validParameter.validFile(parameters, "fasta", false);
172 if (fastafile == "not found") {
173 //if there is a current fasta file, use it
174 string filename = m->getFastaFile();
175 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
176 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
178 m->splitAtDash(fastafile, fastaFileNames);
180 //go through files and make sure they are good, if not, then disregard them
181 for (int i = 0; i < fastaFileNames.size(); i++) {
184 if (fastaFileNames[i] == "current") {
185 fastaFileNames[i] = m->getFastaFile();
186 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
188 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
189 //erase from file list
190 fastaFileNames.erase(fastaFileNames.begin()+i);
197 if (inputDir != "") {
198 string path = m->hasPath(fastaFileNames[i]);
199 //if the user has not given a path then, add inputdir. else leave path alone.
200 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
206 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
208 //if you can't open it, try default location
209 if (ableToOpen == 1) {
210 if (m->getDefaultPath() != "") { //default path is set
211 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
212 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
214 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
216 fastaFileNames[i] = tryPath;
220 if (ableToOpen == 1) {
221 if (m->getOutputDir() != "") { //default path is set
222 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
223 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
225 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
227 fastaFileNames[i] = tryPath;
233 if (ableToOpen == 1) {
234 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
235 //erase from file list
236 fastaFileNames.erase(fastaFileNames.begin()+i);
239 m->setFastaFile(fastaFileNames[i]);
244 //make sure there is at least one valid file left
245 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
249 //check for required parameters
250 namefile = validParameter.validFile(parameters, "name", false);
251 if (namefile == "not found") { namefile = ""; }
253 m->splitAtDash(namefile, nameFileNames);
255 //go through files and make sure they are good, if not, then disregard them
256 for (int i = 0; i < nameFileNames.size(); i++) {
259 if (nameFileNames[i] == "current") {
260 nameFileNames[i] = m->getNameFile();
261 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
263 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
264 //erase from file list
265 nameFileNames.erase(nameFileNames.begin()+i);
272 if (inputDir != "") {
273 string path = m->hasPath(nameFileNames[i]);
274 //if the user has not given a path then, add inputdir. else leave path alone.
275 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
281 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
283 //if you can't open it, try default location
284 if (ableToOpen == 1) {
285 if (m->getDefaultPath() != "") { //default path is set
286 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
287 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
289 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
291 nameFileNames[i] = tryPath;
295 if (ableToOpen == 1) {
296 if (m->getOutputDir() != "") { //default path is set
297 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
298 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
300 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
302 nameFileNames[i] = tryPath;
308 if (ableToOpen == 1) {
309 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
310 //erase from file list
311 nameFileNames.erase(nameFileNames.begin()+i);
314 m->setNameFile(nameFileNames[i]);
320 if (nameFileNames.size() != 0) { hasName = true; }
322 //check for required parameters
323 vector<string> countfileNames;
324 countfile = validParameter.validFile(parameters, "count", false);
325 if (countfile == "not found") {
328 m->splitAtDash(countfile, countfileNames);
330 //go through files and make sure they are good, if not, then disregard them
331 for (int i = 0; i < countfileNames.size(); i++) {
334 if (countfileNames[i] == "current") {
335 countfileNames[i] = m->getCountTableFile();
336 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
338 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
339 //erase from file list
340 countfileNames.erase(countfileNames.begin()+i);
347 if (inputDir != "") {
348 string path = m->hasPath(countfileNames[i]);
349 //if the user has not given a path then, add inputdir. else leave path alone.
350 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
356 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
358 //if you can't open it, try default location
359 if (ableToOpen == 1) {
360 if (m->getDefaultPath() != "") { //default path is set
361 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
362 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
364 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
366 countfileNames[i] = tryPath;
370 if (ableToOpen == 1) {
371 if (m->getOutputDir() != "") { //default path is set
372 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
373 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
375 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
377 countfileNames[i] = tryPath;
383 if (ableToOpen == 1) {
384 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
385 //erase from file list
386 countfileNames.erase(countfileNames.begin()+i);
389 m->setCountTableFile(countfileNames[i]);
395 if (countfileNames.size() != 0) { hasCount = true; }
397 //make sure there is at least one valid file left
398 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
400 if (!hasName && hasCount) { nameFileNames = countfileNames; }
402 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
404 bool hasGroup = true;
405 groupfile = validParameter.validFile(parameters, "group", false);
406 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
408 m->splitAtDash(groupfile, groupFileNames);
410 //go through files and make sure they are good, if not, then disregard them
411 for (int i = 0; i < groupFileNames.size(); i++) {
414 if (groupFileNames[i] == "current") {
415 groupFileNames[i] = m->getGroupFile();
416 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
418 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
419 //erase from file list
420 groupFileNames.erase(groupFileNames.begin()+i);
427 if (inputDir != "") {
428 string path = m->hasPath(groupFileNames[i]);
429 //if the user has not given a path then, add inputdir. else leave path alone.
430 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
436 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
438 //if you can't open it, try default location
439 if (ableToOpen == 1) {
440 if (m->getDefaultPath() != "") { //default path is set
441 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
442 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
444 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
446 groupFileNames[i] = tryPath;
450 if (ableToOpen == 1) {
451 if (m->getOutputDir() != "") { //default path is set
452 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
453 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
455 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
457 groupFileNames[i] = tryPath;
463 if (ableToOpen == 1) {
464 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
465 //erase from file list
466 groupFileNames.erase(groupFileNames.begin()+i);
469 m->setGroupFile(groupFileNames[i]);
474 //make sure there is at least one valid file left
475 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
478 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
480 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
481 //if the user changes the output directory command factory will send this info to us in the output parameter
482 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 it = parameters.find("reference");
490 //user has given a template file
491 if(it != parameters.end()){
492 if (it->second == "self") { templatefile = "self"; }
494 path = m->hasPath(it->second);
495 //if the user has not given a path then, add inputdir. else leave path alone.
496 if (path == "") { parameters["reference"] = inputDir + it->second; }
498 templatefile = validParameter.validFile(parameters, "reference", true);
499 if (templatefile == "not open") { abort = true; }
500 else if (templatefile == "not found") { //check for saved reference sequences
501 if (rdb->getSavedReference() != "") {
502 templatefile = rdb->getSavedReference();
503 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
505 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
506 m->mothurOutEndLine();
511 }else if (hasName) { templatefile = "self"; }
512 else if (hasCount) { templatefile = "self"; }
514 if (rdb->getSavedReference() != "") {
515 templatefile = rdb->getSavedReference();
516 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
518 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
519 m->mothurOutEndLine();
520 templatefile = ""; abort = true;
524 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
525 m->setProcessors(temp);
526 m->mothurConvert(temp, processors);
528 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
529 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
531 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
532 chimealns = m->isTrue(temp);
534 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
535 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
536 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
537 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
538 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
539 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
540 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
541 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
542 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
543 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
544 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
545 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
547 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
548 ucl = m->isTrue(temp);
550 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
551 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
553 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
554 skipgaps = m->isTrue(temp);
556 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
557 skipgaps2 = m->isTrue(temp);
559 string usedDups = "false";
560 temp = validParameter.validFile(parameters, "dereplicate", false);
561 if (temp == "not found") {
562 if (groupfile != "") { temp = "false"; }
563 else { temp = "true"; usedDups = ""; }
565 dups = m->isTrue(temp);
568 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
569 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
571 //look for uchime exe
573 string tempPath = path;
574 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
575 path = path.substr(0, (tempPath.find_last_of('m')));
577 string uchimeCommand;
578 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
579 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
581 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
582 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
583 newCommand->execute();
585 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
586 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
587 newCommand->execute();
591 uchimeCommand = path + "uchime.exe";
594 //test to make sure uchime exists
596 uchimeCommand = m->getFullPathName(uchimeCommand);
597 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
598 if(ableToOpen == 1) {
599 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
600 //check to see if uchime is in the path??
602 string uLocation = m->findProgramPath("uchime");
606 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
607 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
609 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
612 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
613 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
614 }else { uchimeLocation = uchimeCommand; }
616 uchimeLocation = m->getFullPathName(uchimeLocation);
619 catch(exception& e) {
620 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
624 //***************************************************************************************************************
626 int ChimeraUchimeCommand::execute(){
629 if (abort == true) { if (calledHelp) { return 0; } return 2; }
631 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
633 for (int s = 0; s < fastaFileNames.size(); s++) {
635 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
637 int start = time(NULL);
638 string nameFile = "";
639 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
640 map<string, string> variables;
641 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
642 string outputFileName = getOutputFileName("chimera", variables);
643 string accnosFileName = getOutputFileName("accnos", variables);
644 string alnsFileName = getOutputFileName("alns", variables);
645 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
647 //you provided a groupfile
648 string groupFile = "";
649 bool hasGroup = false;
650 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
653 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
656 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
658 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
659 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
660 nameFile = nameFileNames[s];
661 }else { nameFile = getNamesFile(fastaFileNames[s]); }
663 map<string, string> seqs;
664 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
667 vector<seqPriorityNode> nameMapCount;
671 ct.readTable(nameFile);
672 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
673 int num = ct.getNumSeqs(it->first);
674 if (num == 0) { error = 1; }
676 seqPriorityNode temp(num, it->second, it->first);
677 nameMapCount.push_back(temp);
681 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
683 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
684 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
686 printFile(nameMapCount, newFasta);
687 fastaFileNames[s] = newFasta;
690 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
693 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
694 nameFile = nameFileNames[s];
695 }else { nameFile = getNamesFile(fastaFileNames[s]); }
697 //Parse sequences by group
698 vector<string> groups;
699 map<string, string> uniqueNames;
701 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
702 groups = cparser->getNamesOfGroups();
703 uniqueNames = cparser->getAllSeqsMap();
705 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
706 groups = sparser->getNamesOfGroups();
707 uniqueNames = sparser->getAllSeqsMap();
710 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
713 ofstream out, out1, out2;
714 m->openOutputFile(outputFileName, out); out.close();
715 m->openOutputFile(accnosFileName, out1); out1.close();
716 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
719 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
720 else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
722 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
723 if (hasCount) { delete cparser; }
724 else { delete sparser; }
727 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
729 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
730 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
733 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
736 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
741 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
742 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
746 m->openOutputFile(outputFileName+".temp", out);
747 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
750 m->appendFiles(outputFileName, outputFileName+".temp");
751 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
753 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
755 //remove file made for uchime
756 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
758 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
761 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
762 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
763 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
766 //set accnos file as new current accnosfile
768 itTypes = outputTypes.find("accnos");
769 if (itTypes != outputTypes.end()) {
770 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
773 m->mothurOutEndLine();
774 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
775 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
776 m->mothurOutEndLine();
781 catch(exception& e) {
782 m->errorOut(e, "ChimeraUchimeCommand", "execute");
786 //**********************************************************************************************************************
787 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
789 map<string, string>::iterator itUnique;
794 m->openInputFile(accnosFileName, in2);
797 m->openOutputFile(accnosFileName+".temp", out2);
800 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
801 set<string>::iterator itNames;
802 set<string> chimerasInFile;
803 set<string>::iterator itChimeras;
807 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
809 in2 >> name; m->gobble(in2);
812 itUnique = uniqueNames.find(name);
814 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
816 itChimeras = chimerasInFile.find((itUnique->second));
818 if (itChimeras == chimerasInFile.end()) {
819 out2 << itUnique->second << endl;
820 chimerasInFile.insert((itUnique->second));
828 m->mothurRemove(accnosFileName);
829 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
835 m->openInputFile(outputFileName, in);
838 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
839 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
842 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
845 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
846 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
847 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
848 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
853 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
856 in >> temp1; m->gobble(in);
857 in >> name; m->gobble(in);
858 in >> parent1; m->gobble(in);
859 in >> parent2; m->gobble(in);
860 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
863 //parse name - name will look like U68590/ab=1/
864 string restOfName = "";
865 int pos = name.find_first_of('/');
866 if (pos != string::npos) {
867 restOfName = name.substr(pos);
868 name = name.substr(0, pos);
872 itUnique = uniqueNames.find(name);
874 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
876 name = itUnique->second;
877 //is this name already in the file
878 itNames = namesInFile.find((name));
880 if (itNames == namesInFile.end()) { //no not in file
881 if (flag == "N") { //are you really a no??
882 //is this sequence really not chimeric??
883 itChimeras = chimerasInFile.find(name);
885 //then you really are a no so print, otherwise skip
886 if (itChimeras == chimerasInFile.end()) { print = true; }
887 }else{ print = true; }
892 out << temp1 << '\t' << name << restOfName << '\t';
893 namesInFile.insert(name);
895 //parse parent1 names
896 if (parent1 != "*") {
898 pos = parent1.find_first_of('/');
899 if (pos != string::npos) {
900 restOfName = parent1.substr(pos);
901 parent1 = parent1.substr(0, pos);
904 itUnique = uniqueNames.find(parent1);
905 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
906 else { out << itUnique->second << restOfName << '\t'; }
907 }else { out << parent1 << '\t'; }
909 //parse parent2 names
910 if (parent2 != "*") {
912 pos = parent2.find_first_of('/');
913 if (pos != string::npos) {
914 restOfName = parent2.substr(pos);
915 parent2 = parent2.substr(0, pos);
918 itUnique = uniqueNames.find(parent2);
919 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
920 else { out << itUnique->second << restOfName << '\t'; }
921 }else { out << parent2 << '\t'; }
923 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
929 m->mothurRemove(outputFileName);
930 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
934 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
936 ------------------------------------------------------------------------
937 Query ( 179 nt) F21Fcsw_11639/ab=591/
938 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
939 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
941 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
942 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
943 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
944 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
945 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
946 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
948 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
949 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
950 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
951 Diffs NNN N N N N N BB NNN
952 Votes 000 0 0 0 0 0 ++ 000
953 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
955 A 159 TGGAACTGAGACACGGTCCAA 179
956 Q 159 TGGAACTGAGACACGGTCCAA 179
957 B 161 TGGAACTGAGACACGGTCCAA 181
960 Model BBBBBBBBBBBBBBBBBBBBB
962 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
963 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
967 m->openInputFile(alnsFileName, in3);
970 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
977 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
980 line = m->getline(in3);
984 istringstream iss(line);
987 //are you a name line
988 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
990 for (int i = 0; i < line.length(); i++) {
992 if (line[i] == ')') { break; }
993 else { out3 << line[i]; }
996 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
997 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
999 out << line[spot] << line[spot+1];
1001 name = line.substr(spot+2);
1003 //parse name - name will either look like U68590/ab=1/ or U68590
1004 string restOfName = "";
1005 int pos = name.find_first_of('/');
1006 if (pos != string::npos) {
1007 restOfName = name.substr(pos);
1008 name = name.substr(0, pos);
1012 itUnique = uniqueNames.find(name);
1014 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1016 //only limit repeats on query names
1017 if (temp == "Query") {
1018 itNames = namesInFile.find((itUnique->second));
1020 if (itNames == namesInFile.end()) {
1021 out << itUnique->second << restOfName << endl;
1022 namesInFile.insert((itUnique->second));
1024 }else { out << itUnique->second << restOfName << endl; }
1029 }else { //not need to alter line
1030 out3 << line << endl;
1032 }else { out3 << endl; }
1037 m->mothurRemove(alnsFileName);
1038 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1043 catch(exception& e) {
1044 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1048 //**********************************************************************************************************************
1049 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1052 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1055 m->openOutputFile(filename, out);
1057 //print new file in order of
1058 for (int i = 0; i < nameMapCount.size(); i++) {
1059 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1065 catch(exception& e) {
1066 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1070 //**********************************************************************************************************************
1071 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1073 //create input file for uchime
1074 //read through fastafile and store info
1076 m->openInputFile(filename, in);
1080 if (m->control_pressed) { in.close(); return 0; }
1082 Sequence seq(in); m->gobble(in);
1083 seqs[seq.getName()] = seq.getAligned();
1089 catch(exception& e) {
1090 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1094 //**********************************************************************************************************************
1096 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1098 string nameFile = "";
1100 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1102 //use unique.seqs to create new name and fastafile
1103 string inputString = "fasta=" + inputFile;
1104 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1105 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1106 m->mothurCalling = true;
1108 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1109 uniqueCommand->execute();
1111 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1113 delete uniqueCommand;
1114 m->mothurCalling = false;
1115 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1117 nameFile = filenames["name"][0];
1118 inputFile = filenames["fasta"][0];
1122 catch(exception& e) {
1123 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1127 //**********************************************************************************************************************
1128 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1132 int numChimeras = 0;
1134 for (int i = start; i < end; i++) {
1135 int start = time(NULL); if (m->control_pressed) { return 0; }
1138 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1139 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1141 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1142 totalSeqs += numSeqs;
1144 if (m->control_pressed) { return 0; }
1146 //remove file made for uchime
1147 if (!m->debug) { m->mothurRemove(filename); }
1148 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1151 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1152 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1153 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1155 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1160 catch(exception& e) {
1161 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1165 //**********************************************************************************************************************
1167 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1170 outputFName = m->getFullPathName(outputFName);
1171 filename = m->getFullPathName(filename);
1172 alns = m->getFullPathName(alns);
1174 //to allow for spaces in the path
1175 outputFName = "\"" + outputFName + "\"";
1176 filename = "\"" + filename + "\"";
1177 alns = "\"" + alns + "\"";
1179 vector<char*> cPara;
1181 string uchimeCommand = uchimeLocation;
1182 uchimeCommand = "\"" + uchimeCommand + "\" ";
1185 tempUchime= new char[uchimeCommand.length()+1];
1187 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1188 cPara.push_back(tempUchime);
1190 //are you using a reference file
1191 if (templatefile != "self") {
1192 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1193 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1194 filename = outputFileName;
1195 filename = "\"" + filename + "\"";
1196 //add reference file
1197 char* tempRef = new char[5];
1198 //strcpy(tempRef, "--db");
1199 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1200 cPara.push_back(tempRef);
1201 char* tempR = new char[templatefile.length()+1];
1202 //strcpy(tempR, templatefile.c_str());
1203 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1204 cPara.push_back(tempR);
1207 char* tempIn = new char[8];
1208 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1209 //strcpy(tempIn, "--input");
1210 cPara.push_back(tempIn);
1211 char* temp = new char[filename.length()+1];
1212 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1213 //strcpy(temp, filename.c_str());
1214 cPara.push_back(temp);
1216 char* tempO = new char[12];
1217 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1218 //strcpy(tempO, "--uchimeout");
1219 cPara.push_back(tempO);
1220 char* tempout = new char[outputFName.length()+1];
1221 //strcpy(tempout, outputFName.c_str());
1222 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1223 cPara.push_back(tempout);
1226 char* tempA = new char[13];
1227 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1228 //strcpy(tempA, "--uchimealns");
1229 cPara.push_back(tempA);
1230 char* tempa = new char[alns.length()+1];
1231 //strcpy(tempa, alns.c_str());
1232 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1233 cPara.push_back(tempa);
1237 char* tempskew = new char[9];
1238 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1239 //strcpy(tempskew, "--abskew");
1240 cPara.push_back(tempskew);
1241 char* tempSkew = new char[abskew.length()+1];
1242 //strcpy(tempSkew, abskew.c_str());
1243 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1244 cPara.push_back(tempSkew);
1248 char* tempminh = new char[7];
1249 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1250 //strcpy(tempminh, "--minh");
1251 cPara.push_back(tempminh);
1252 char* tempMinH = new char[minh.length()+1];
1253 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1254 //strcpy(tempMinH, minh.c_str());
1255 cPara.push_back(tempMinH);
1259 char* tempmindiv = new char[9];
1260 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1261 //strcpy(tempmindiv, "--mindiv");
1262 cPara.push_back(tempmindiv);
1263 char* tempMindiv = new char[mindiv.length()+1];
1264 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1265 //strcpy(tempMindiv, mindiv.c_str());
1266 cPara.push_back(tempMindiv);
1270 char* tempxn = new char[5];
1271 //strcpy(tempxn, "--xn");
1272 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1273 cPara.push_back(tempxn);
1274 char* tempXn = new char[xn.length()+1];
1275 //strcpy(tempXn, xn.c_str());
1276 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1277 cPara.push_back(tempXn);
1281 char* tempdn = new char[5];
1282 //strcpy(tempdn, "--dn");
1283 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1284 cPara.push_back(tempdn);
1285 char* tempDn = new char[dn.length()+1];
1286 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1287 //strcpy(tempDn, dn.c_str());
1288 cPara.push_back(tempDn);
1292 char* tempxa = new char[5];
1293 //strcpy(tempxa, "--xa");
1294 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1295 cPara.push_back(tempxa);
1296 char* tempXa = new char[xa.length()+1];
1297 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1298 //strcpy(tempXa, xa.c_str());
1299 cPara.push_back(tempXa);
1303 char* tempchunks = new char[9];
1304 //strcpy(tempchunks, "--chunks");
1305 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1306 cPara.push_back(tempchunks);
1307 char* tempChunks = new char[chunks.length()+1];
1308 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1309 //strcpy(tempChunks, chunks.c_str());
1310 cPara.push_back(tempChunks);
1314 char* tempminchunk = new char[11];
1315 //strcpy(tempminchunk, "--minchunk");
1316 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1317 cPara.push_back(tempminchunk);
1318 char* tempMinchunk = new char[minchunk.length()+1];
1319 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1320 //strcpy(tempMinchunk, minchunk.c_str());
1321 cPara.push_back(tempMinchunk);
1324 if (useIdsmoothwindow) {
1325 char* tempidsmoothwindow = new char[17];
1326 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1327 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1328 cPara.push_back(tempidsmoothwindow);
1329 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1330 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1331 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1332 cPara.push_back(tempIdsmoothwindow);
1335 /*if (useMinsmoothid) {
1336 char* tempminsmoothid = new char[14];
1337 //strcpy(tempminsmoothid, "--minsmoothid");
1338 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1339 cPara.push_back(tempminsmoothid);
1340 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1341 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1342 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1343 cPara.push_back(tempMinsmoothid);
1347 char* tempmaxp = new char[7];
1348 //strcpy(tempmaxp, "--maxp");
1349 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1350 cPara.push_back(tempmaxp);
1351 char* tempMaxp = new char[maxp.length()+1];
1352 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1353 //strcpy(tempMaxp, maxp.c_str());
1354 cPara.push_back(tempMaxp);
1358 char* tempskipgaps = new char[13];
1359 //strcpy(tempskipgaps, "--[no]skipgaps");
1360 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1361 cPara.push_back(tempskipgaps);
1365 char* tempskipgaps2 = new char[14];
1366 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1367 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1368 cPara.push_back(tempskipgaps2);
1372 char* tempminlen = new char[9];
1373 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1374 //strcpy(tempminlen, "--minlen");
1375 cPara.push_back(tempminlen);
1376 char* tempMinlen = new char[minlen.length()+1];
1377 //strcpy(tempMinlen, minlen.c_str());
1378 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1379 cPara.push_back(tempMinlen);
1383 char* tempmaxlen = new char[9];
1384 //strcpy(tempmaxlen, "--maxlen");
1385 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1386 cPara.push_back(tempmaxlen);
1387 char* tempMaxlen = new char[maxlen.length()+1];
1388 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1389 //strcpy(tempMaxlen, maxlen.c_str());
1390 cPara.push_back(tempMaxlen);
1394 char* tempucl = new char[5];
1395 strcpy(tempucl, "--ucl");
1396 cPara.push_back(tempucl);
1399 if (useQueryfract) {
1400 char* tempqueryfract = new char[13];
1401 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1402 //strcpy(tempqueryfract, "--queryfract");
1403 cPara.push_back(tempqueryfract);
1404 char* tempQueryfract = new char[queryfract.length()+1];
1405 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1406 //strcpy(tempQueryfract, queryfract.c_str());
1407 cPara.push_back(tempQueryfract);
1411 char** uchimeParameters;
1412 uchimeParameters = new char*[cPara.size()];
1413 string commandString = "";
1414 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1415 //int numArgs = cPara.size();
1417 //uchime_main(numArgs, uchimeParameters);
1418 //cout << "commandString = " << commandString << endl;
1419 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1421 commandString = "\"" + commandString + "\"";
1423 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1424 system(commandString.c_str());
1427 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1428 delete[] uchimeParameters;
1430 //remove "" from filenames
1431 outputFName = outputFName.substr(1, outputFName.length()-2);
1432 filename = filename.substr(1, filename.length()-2);
1433 alns = alns.substr(1, alns.length()-2);
1435 if (m->control_pressed) { return 0; }
1437 //create accnos file from uchime results
1439 m->openInputFile(outputFName, in);
1442 m->openOutputFile(accnos, out);
1448 if (m->control_pressed) { break; }
1451 string chimeraFlag = "";
1452 //in >> chimeraFlag >> name;
1454 string line = m->getline(in);
1455 vector<string> pieces = m->splitWhiteSpace(line);
1456 if (pieces.size() > 2) {
1458 //fix name if needed
1459 if (templatefile == "self") {
1460 name = name.substr(0, name.length()-1); //rip off last /
1461 name = name.substr(0, name.find_last_of('/'));
1464 chimeraFlag = pieces[pieces.size()-1];
1466 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1469 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1475 //if (templatefile != "self") { m->mothurRemove(filename); }
1479 catch(exception& e) {
1480 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1484 /**************************************************************************************************/
1485 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1486 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1490 m->openInputFile(filename, in);
1493 m->openOutputFile(output, out);
1496 if (m->control_pressed) { break; }
1498 Sequence seq(in); m->gobble(in);
1500 if (seq.getName() != "") { seq.printSequence(out); }
1507 catch(exception& e) {
1508 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1512 /**************************************************************************************************/
1514 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1520 vector<string> files;
1522 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1523 //break up file into multiple files
1524 m->divideFile(filename, processors, files);
1526 if (m->control_pressed) { return 0; }
1528 //loop through and create all the processes you want
1529 while (process != processors) {
1533 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1535 }else if (pid == 0){
1536 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1538 //pass numSeqs to parent
1540 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1541 m->openOutputFile(tempFile, out);
1543 out << numChimeras << endl;
1548 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1549 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1555 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1557 //force parent to wait until all the processes are done
1558 for (int i=0;i<processIDS.size();i++) {
1559 int temp = processIDS[i];
1563 for (int i = 0; i < processIDS.size(); i++) {
1565 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1566 m->openInputFile(tempFile, in);
1569 in >> tempNum; m->gobble(in);
1572 numChimeras += tempNum;
1574 in.close(); m->mothurRemove(tempFile);
1577 //////////////////////////////////////////////////////////////////////////////////////////////////////
1578 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1579 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1580 //////////////////////////////////////////////////////////////////////////////////////////////////////
1585 map<int, ofstream*> filehandles;
1586 map<int, ofstream*>::iterator it3;
1589 for (int i = 0; i < processors; i++) {
1590 temp = new ofstream;
1591 filehandles[i] = temp;
1592 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1593 files.push_back(filename+toString(i)+".temp");
1597 m->openInputFile(filename, in);
1601 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1603 Sequence tempSeq(in); m->gobble(in);
1605 if (tempSeq.getName() != "") {
1606 tempSeq.printSequence(*(filehandles[spot]));
1608 if (spot == processors) { spot = 0; }
1614 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1615 (*(it3->second)).close();
1619 //sanity check for number of processors
1620 if (count < processors) { processors = count; }
1622 vector<uchimeData*> pDataArray;
1623 DWORD dwThreadIdArray[processors-1];
1624 HANDLE hThreadArray[processors-1];
1625 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1627 //Create processor worker threads.
1628 for( int i=1; i<processors; i++ ){
1629 // Allocate memory for thread data.
1630 string extension = toString(i) + ".temp";
1632 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1633 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1634 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1636 pDataArray.push_back(tempUchime);
1637 processIDS.push_back(i);
1639 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1640 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1641 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1645 //using the main process as a worker saves time and memory
1646 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1648 //Wait until all threads have terminated.
1649 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1651 //Close all thread handles and free memory allocations.
1652 for(int i=0; i < pDataArray.size(); i++){
1653 num += pDataArray[i]->count;
1654 numChimeras += pDataArray[i]->numChimeras;
1655 CloseHandle(hThreadArray[i]);
1656 delete pDataArray[i];
1660 //append output files
1661 for(int i=0;i<processIDS.size();i++){
1662 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1663 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1665 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1666 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1669 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1670 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1674 //get rid of the file pieces.
1675 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1678 catch(exception& e) {
1679 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1683 /**************************************************************************************************/
1685 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1693 if (groups.size() < processors) { processors = groups.size(); }
1695 //divide the groups between the processors
1696 vector<linePair> lines;
1697 int numGroupsPerProcessor = groups.size() / processors;
1698 for (int i = 0; i < processors; i++) {
1699 int startIndex = i * numGroupsPerProcessor;
1700 int endIndex = (i+1) * numGroupsPerProcessor;
1701 if(i == (processors - 1)){ endIndex = groups.size(); }
1702 lines.push_back(linePair(startIndex, endIndex));
1705 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1707 //loop through and create all the processes you want
1708 while (process != processors) {
1712 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1714 }else if (pid == 0){
1715 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1717 //pass numSeqs to parent
1719 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1720 m->openOutputFile(tempFile, out);
1726 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1727 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1733 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1735 //force parent to wait until all the processes are done
1736 for (int i=0;i<processIDS.size();i++) {
1737 int temp = processIDS[i];
1741 for (int i = 0; i < processIDS.size(); i++) {
1743 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1744 m->openInputFile(tempFile, in);
1745 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1746 in.close(); m->mothurRemove(tempFile);
1750 //////////////////////////////////////////////////////////////////////////////////////////////////////
1751 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1752 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1753 //////////////////////////////////////////////////////////////////////////////////////////////////////
1755 vector<uchimeData*> pDataArray;
1756 DWORD dwThreadIdArray[processors-1];
1757 HANDLE hThreadArray[processors-1];
1759 //Create processor worker threads.
1760 for( int i=1; i<processors; i++ ){
1761 // Allocate memory for thread data.
1762 string extension = toString(i) + ".temp";
1764 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1765 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1766 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1768 pDataArray.push_back(tempUchime);
1769 processIDS.push_back(i);
1771 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1772 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1773 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1777 //using the main process as a worker saves time and memory
1778 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1780 //Wait until all threads have terminated.
1781 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1783 //Close all thread handles and free memory allocations.
1784 for(int i=0; i < pDataArray.size(); i++){
1785 num += pDataArray[i]->count;
1786 CloseHandle(hThreadArray[i]);
1787 delete pDataArray[i];
1792 //append output files
1793 for(int i=0;i<processIDS.size();i++){
1794 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1795 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1797 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1798 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1801 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1802 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1809 catch(exception& e) {
1810 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1814 /**************************************************************************************************/