2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
29 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
30 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
31 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
32 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
33 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
34 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
35 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
36 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
37 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
38 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
39 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
40 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
41 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
42 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
43 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
44 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
45 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
47 vector<string> myArray;
48 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
52 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56 //**********************************************************************************************************************
57 string ChimeraUchimeCommand::getHelpString(){
59 string helpString = "";
60 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
61 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
62 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
63 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
64 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
65 helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
66 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
67 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
68 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
69 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
70 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
71 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
72 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
73 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
74 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
75 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
76 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
77 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
78 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
79 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
80 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
81 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
82 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
83 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
84 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
85 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
86 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
87 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
89 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
91 helpString += "The chimera.uchime command should be in the following format: \n";
92 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
93 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
94 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
98 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
102 //**********************************************************************************************************************
103 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){
105 string outputFileName = "";
106 map<string, vector<string> >::iterator it;
108 //is this a type this command creates
109 it = outputTypes.find(type);
110 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
112 if (type == "chimera") { outputFileName = "uchime.chimeras"; }
113 else if (type == "accnos") { outputFileName = "uchime.accnos"; }
114 else if (type == "alns") { outputFileName = "uchime.alns"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
117 return outputFileName;
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
134 catch(exception& e) {
135 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
139 //***************************************************************************************************************
140 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
142 abort = false; calledHelp = false; hasName=false; hasCount=false;
143 ReferenceDB* rdb = ReferenceDB::getInstance();
145 //allow user to run help
146 if(option == "help") { help(); abort = true; calledHelp = true; }
147 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
150 vector<string> myArray = setParameters();
152 OptionParser parser(option);
153 map<string,string> parameters = parser.getParameters();
155 ValidParameters validParameter("chimera.uchime");
156 map<string,string>::iterator it;
158 //check to make sure all parameters are valid for command
159 for (it = parameters.begin(); it != parameters.end(); it++) {
160 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
163 vector<string> tempOutNames;
164 outputTypes["chimera"] = tempOutNames;
165 outputTypes["accnos"] = tempOutNames;
166 outputTypes["alns"] = tempOutNames;
168 //if the user changes the input directory command factory will send this info to us in the output parameter
169 string inputDir = validParameter.validFile(parameters, "inputdir", false);
170 if (inputDir == "not found"){ inputDir = ""; }
172 //check for required parameters
173 fastafile = validParameter.validFile(parameters, "fasta", false);
174 if (fastafile == "not found") {
175 //if there is a current fasta file, use it
176 string filename = m->getFastaFile();
177 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
180 m->splitAtDash(fastafile, fastaFileNames);
182 //go through files and make sure they are good, if not, then disregard them
183 for (int i = 0; i < fastaFileNames.size(); i++) {
186 if (fastaFileNames[i] == "current") {
187 fastaFileNames[i] = m->getFastaFile();
188 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
190 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
191 //erase from file list
192 fastaFileNames.erase(fastaFileNames.begin()+i);
199 if (inputDir != "") {
200 string path = m->hasPath(fastaFileNames[i]);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
208 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
210 //if you can't open it, try default location
211 if (ableToOpen == 1) {
212 if (m->getDefaultPath() != "") { //default path is set
213 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
214 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
216 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
218 fastaFileNames[i] = tryPath;
222 if (ableToOpen == 1) {
223 if (m->getOutputDir() != "") { //default path is set
224 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
225 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
227 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
229 fastaFileNames[i] = tryPath;
235 if (ableToOpen == 1) {
236 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
237 //erase from file list
238 fastaFileNames.erase(fastaFileNames.begin()+i);
241 m->setFastaFile(fastaFileNames[i]);
246 //make sure there is at least one valid file left
247 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
251 //check for required parameters
252 namefile = validParameter.validFile(parameters, "name", false);
253 if (namefile == "not found") { namefile = ""; }
255 m->splitAtDash(namefile, nameFileNames);
257 //go through files and make sure they are good, if not, then disregard them
258 for (int i = 0; i < nameFileNames.size(); i++) {
261 if (nameFileNames[i] == "current") {
262 nameFileNames[i] = m->getNameFile();
263 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
265 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
266 //erase from file list
267 nameFileNames.erase(nameFileNames.begin()+i);
274 if (inputDir != "") {
275 string path = m->hasPath(nameFileNames[i]);
276 //if the user has not given a path then, add inputdir. else leave path alone.
277 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
283 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
285 //if you can't open it, try default location
286 if (ableToOpen == 1) {
287 if (m->getDefaultPath() != "") { //default path is set
288 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
289 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
291 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
293 nameFileNames[i] = tryPath;
297 if (ableToOpen == 1) {
298 if (m->getOutputDir() != "") { //default path is set
299 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
300 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
302 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
304 nameFileNames[i] = tryPath;
310 if (ableToOpen == 1) {
311 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
312 //erase from file list
313 nameFileNames.erase(nameFileNames.begin()+i);
316 m->setNameFile(nameFileNames[i]);
322 if (nameFileNames.size() != 0) { hasName = true; }
324 //check for required parameters
325 vector<string> countfileNames;
326 countfile = validParameter.validFile(parameters, "count", false);
327 if (countfile == "not found") {
330 m->splitAtDash(countfile, countfileNames);
332 //go through files and make sure they are good, if not, then disregard them
333 for (int i = 0; i < countfileNames.size(); i++) {
336 if (countfileNames[i] == "current") {
337 countfileNames[i] = m->getCountTableFile();
338 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
340 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
341 //erase from file list
342 countfileNames.erase(countfileNames.begin()+i);
349 if (inputDir != "") {
350 string path = m->hasPath(countfileNames[i]);
351 //if the user has not given a path then, add inputdir. else leave path alone.
352 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
358 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
360 //if you can't open it, try default location
361 if (ableToOpen == 1) {
362 if (m->getDefaultPath() != "") { //default path is set
363 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
364 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
366 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
368 countfileNames[i] = tryPath;
372 if (ableToOpen == 1) {
373 if (m->getOutputDir() != "") { //default path is set
374 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
375 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
377 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
379 countfileNames[i] = tryPath;
385 if (ableToOpen == 1) {
386 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
387 //erase from file list
388 countfileNames.erase(countfileNames.begin()+i);
391 m->setCountTableFile(countfileNames[i]);
397 if (countfileNames.size() != 0) { hasCount = true; }
399 //make sure there is at least one valid file left
400 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
402 if (!hasName && hasCount) { nameFileNames = countfileNames; }
404 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
406 bool hasGroup = true;
407 groupfile = validParameter.validFile(parameters, "group", false);
408 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
410 m->splitAtDash(groupfile, groupFileNames);
412 //go through files and make sure they are good, if not, then disregard them
413 for (int i = 0; i < groupFileNames.size(); i++) {
416 if (groupFileNames[i] == "current") {
417 groupFileNames[i] = m->getGroupFile();
418 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
420 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
421 //erase from file list
422 groupFileNames.erase(groupFileNames.begin()+i);
429 if (inputDir != "") {
430 string path = m->hasPath(groupFileNames[i]);
431 //if the user has not given a path then, add inputdir. else leave path alone.
432 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
438 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
440 //if you can't open it, try default location
441 if (ableToOpen == 1) {
442 if (m->getDefaultPath() != "") { //default path is set
443 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
444 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
446 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
448 groupFileNames[i] = tryPath;
452 if (ableToOpen == 1) {
453 if (m->getOutputDir() != "") { //default path is set
454 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
455 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
457 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
459 groupFileNames[i] = tryPath;
465 if (ableToOpen == 1) {
466 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
467 //erase from file list
468 groupFileNames.erase(groupFileNames.begin()+i);
471 m->setGroupFile(groupFileNames[i]);
476 //make sure there is at least one valid file left
477 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
482 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
483 //if the user changes the output directory command factory will send this info to us in the output parameter
484 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
487 //if the user changes the output directory command factory will send this info to us in the output parameter
488 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
491 it = parameters.find("reference");
492 //user has given a template file
493 if(it != parameters.end()){
494 if (it->second == "self") { templatefile = "self"; }
496 path = m->hasPath(it->second);
497 //if the user has not given a path then, add inputdir. else leave path alone.
498 if (path == "") { parameters["reference"] = inputDir + it->second; }
500 templatefile = validParameter.validFile(parameters, "reference", true);
501 if (templatefile == "not open") { abort = true; }
502 else if (templatefile == "not found") { //check for saved reference sequences
503 if (rdb->getSavedReference() != "") {
504 templatefile = rdb->getSavedReference();
505 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
507 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
508 m->mothurOutEndLine();
513 }else if (hasName) { templatefile = "self"; }
514 else if (hasCount) { templatefile = "self"; }
516 if (rdb->getSavedReference() != "") {
517 templatefile = rdb->getSavedReference();
518 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
520 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
521 m->mothurOutEndLine();
522 templatefile = ""; abort = true;
526 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
527 m->setProcessors(temp);
528 m->mothurConvert(temp, processors);
530 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
531 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
533 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
534 chimealns = m->isTrue(temp);
536 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
537 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
538 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
539 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
540 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
541 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
542 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
543 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
544 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
545 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
546 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
547 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
549 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
550 ucl = m->isTrue(temp);
552 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
553 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
555 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
556 skipgaps = m->isTrue(temp);
558 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
559 skipgaps2 = m->isTrue(temp);
561 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
562 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
564 //look for uchime exe
566 string tempPath = path;
567 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
568 path = path.substr(0, (tempPath.find_last_of('m')));
570 string uchimeCommand;
571 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
572 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
574 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
575 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
576 newCommand->execute();
578 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
579 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
580 newCommand->execute();
584 uchimeCommand = path + "uchime.exe";
587 //test to make sure uchime exists
589 uchimeCommand = m->getFullPathName(uchimeCommand);
590 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
591 if(ableToOpen == 1) {
592 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
593 //check to see if uchime is in the path??
595 string uLocation = m->findProgramPath("uchime");
599 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
600 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
602 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
605 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
606 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
607 }else { uchimeLocation = uchimeCommand; }
609 uchimeLocation = m->getFullPathName(uchimeLocation);
612 catch(exception& e) {
613 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
617 //***************************************************************************************************************
619 int ChimeraUchimeCommand::execute(){
622 if (abort == true) { if (calledHelp) { return 0; } return 2; }
624 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
626 for (int s = 0; s < fastaFileNames.size(); s++) {
628 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
630 int start = time(NULL);
631 string nameFile = "";
632 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
633 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
634 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
635 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns");
636 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
638 //you provided a groupfile
639 string groupFile = "";
640 bool hasGroup = false;
641 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
644 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
647 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
649 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
650 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
651 nameFile = nameFileNames[s];
652 }else { nameFile = getNamesFile(fastaFileNames[s]); }
654 map<string, string> seqs;
655 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
658 vector<seqPriorityNode> nameMapCount;
662 ct.readTable(nameFile);
663 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
664 int num = ct.getNumSeqs(it->first);
665 if (num == 0) { error = 1; }
667 seqPriorityNode temp(num, it->second, it->first);
668 nameMapCount.push_back(temp);
672 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
674 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
675 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
677 printFile(nameMapCount, newFasta);
678 fastaFileNames[s] = newFasta;
681 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
684 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
685 nameFile = nameFileNames[s];
686 }else { nameFile = getNamesFile(fastaFileNames[s]); }
688 //Parse sequences by group
689 vector<string> groups;
690 map<string, string> uniqueNames;
692 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
693 groups = cparser->getNamesOfGroups();
694 uniqueNames = cparser->getAllSeqsMap();
696 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
697 groups = sparser->getNamesOfGroups();
698 uniqueNames = sparser->getAllSeqsMap();
701 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
704 ofstream out, out1, out2;
705 m->openOutputFile(outputFileName, out); out.close();
706 m->openOutputFile(accnosFileName, out1); out1.close();
707 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
710 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
711 else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
713 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
714 if (hasCount) { delete cparser; }
715 else { delete sparser; }
717 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
719 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
720 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
722 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
725 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
730 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
731 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
735 m->openOutputFile(outputFileName+".temp", out);
736 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
739 m->appendFiles(outputFileName, outputFileName+".temp");
740 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
742 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
744 //remove file made for uchime
745 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
747 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
750 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
751 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
752 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
755 //set accnos file as new current accnosfile
757 itTypes = outputTypes.find("accnos");
758 if (itTypes != outputTypes.end()) {
759 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
762 m->mothurOutEndLine();
763 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
764 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
765 m->mothurOutEndLine();
770 catch(exception& e) {
771 m->errorOut(e, "ChimeraUchimeCommand", "execute");
775 //**********************************************************************************************************************
776 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
778 map<string, string>::iterator itUnique;
783 m->openInputFile(accnosFileName, in2);
786 m->openOutputFile(accnosFileName+".temp", out2);
789 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
790 set<string>::iterator itNames;
791 set<string> chimerasInFile;
792 set<string>::iterator itChimeras;
796 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
798 in2 >> name; m->gobble(in2);
801 itUnique = uniqueNames.find(name);
803 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
805 itChimeras = chimerasInFile.find((itUnique->second));
807 if (itChimeras == chimerasInFile.end()) {
808 out2 << itUnique->second << endl;
809 chimerasInFile.insert((itUnique->second));
817 m->mothurRemove(accnosFileName);
818 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
824 m->openInputFile(outputFileName, in);
827 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
828 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
831 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
834 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
835 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
836 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
837 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
842 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
845 in >> temp1; m->gobble(in);
846 in >> name; m->gobble(in);
847 in >> parent1; m->gobble(in);
848 in >> parent2; m->gobble(in);
849 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
852 //parse name - name will look like U68590/ab=1/
853 string restOfName = "";
854 int pos = name.find_first_of('/');
855 if (pos != string::npos) {
856 restOfName = name.substr(pos);
857 name = name.substr(0, pos);
861 itUnique = uniqueNames.find(name);
863 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
865 name = itUnique->second;
866 //is this name already in the file
867 itNames = namesInFile.find((name));
869 if (itNames == namesInFile.end()) { //no not in file
870 if (flag == "N") { //are you really a no??
871 //is this sequence really not chimeric??
872 itChimeras = chimerasInFile.find(name);
874 //then you really are a no so print, otherwise skip
875 if (itChimeras == chimerasInFile.end()) { print = true; }
876 }else{ print = true; }
881 out << temp1 << '\t' << name << restOfName << '\t';
882 namesInFile.insert(name);
884 //parse parent1 names
885 if (parent1 != "*") {
887 pos = parent1.find_first_of('/');
888 if (pos != string::npos) {
889 restOfName = parent1.substr(pos);
890 parent1 = parent1.substr(0, pos);
893 itUnique = uniqueNames.find(parent1);
894 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
895 else { out << itUnique->second << restOfName << '\t'; }
896 }else { out << parent1 << '\t'; }
898 //parse parent2 names
899 if (parent2 != "*") {
901 pos = parent2.find_first_of('/');
902 if (pos != string::npos) {
903 restOfName = parent2.substr(pos);
904 parent2 = parent2.substr(0, pos);
907 itUnique = uniqueNames.find(parent2);
908 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909 else { out << itUnique->second << restOfName << '\t'; }
910 }else { out << parent2 << '\t'; }
912 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
918 m->mothurRemove(outputFileName);
919 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
923 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
925 ------------------------------------------------------------------------
926 Query ( 179 nt) F21Fcsw_11639/ab=591/
927 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
928 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
930 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
931 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
932 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
933 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
934 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
935 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
937 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
938 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
939 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
940 Diffs NNN N N N N N BB NNN
941 Votes 000 0 0 0 0 0 ++ 000
942 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
944 A 159 TGGAACTGAGACACGGTCCAA 179
945 Q 159 TGGAACTGAGACACGGTCCAA 179
946 B 161 TGGAACTGAGACACGGTCCAA 181
949 Model BBBBBBBBBBBBBBBBBBBBB
951 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
952 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
956 m->openInputFile(alnsFileName, in3);
959 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
966 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
969 line = m->getline(in3);
973 istringstream iss(line);
976 //are you a name line
977 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
979 for (int i = 0; i < line.length(); i++) {
981 if (line[i] == ')') { break; }
982 else { out3 << line[i]; }
985 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
986 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
988 out << line[spot] << line[spot+1];
990 name = line.substr(spot+2);
992 //parse name - name will either look like U68590/ab=1/ or U68590
993 string restOfName = "";
994 int pos = name.find_first_of('/');
995 if (pos != string::npos) {
996 restOfName = name.substr(pos);
997 name = name.substr(0, pos);
1001 itUnique = uniqueNames.find(name);
1003 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1005 //only limit repeats on query names
1006 if (temp == "Query") {
1007 itNames = namesInFile.find((itUnique->second));
1009 if (itNames == namesInFile.end()) {
1010 out << itUnique->second << restOfName << endl;
1011 namesInFile.insert((itUnique->second));
1013 }else { out << itUnique->second << restOfName << endl; }
1018 }else { //not need to alter line
1019 out3 << line << endl;
1021 }else { out3 << endl; }
1026 m->mothurRemove(alnsFileName);
1027 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1032 catch(exception& e) {
1033 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1037 //**********************************************************************************************************************
1038 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1041 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1044 m->openOutputFile(filename, out);
1046 //print new file in order of
1047 for (int i = 0; i < nameMapCount.size(); i++) {
1048 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1054 catch(exception& e) {
1055 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1059 //**********************************************************************************************************************
1060 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1062 //create input file for uchime
1063 //read through fastafile and store info
1065 m->openInputFile(filename, in);
1069 if (m->control_pressed) { in.close(); return 0; }
1071 Sequence seq(in); m->gobble(in);
1072 seqs[seq.getName()] = seq.getAligned();
1078 catch(exception& e) {
1079 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1083 //**********************************************************************************************************************
1085 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1087 string nameFile = "";
1089 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1091 //use unique.seqs to create new name and fastafile
1092 string inputString = "fasta=" + inputFile;
1093 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1094 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1095 m->mothurCalling = true;
1097 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1098 uniqueCommand->execute();
1100 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1102 delete uniqueCommand;
1103 m->mothurCalling = false;
1104 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1106 nameFile = filenames["name"][0];
1107 inputFile = filenames["fasta"][0];
1111 catch(exception& e) {
1112 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1116 //**********************************************************************************************************************
1117 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1121 int numChimeras = 0;
1123 for (int i = start; i < end; i++) {
1124 int start = time(NULL); if (m->control_pressed) { return 0; }
1127 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1128 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1130 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1131 totalSeqs += numSeqs;
1133 if (m->control_pressed) { return 0; }
1135 //remove file made for uchime
1136 if (!m->debug) { m->mothurRemove(filename); }
1137 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1140 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1141 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1142 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1144 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1149 catch(exception& e) {
1150 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1154 //**********************************************************************************************************************
1156 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1159 outputFName = m->getFullPathName(outputFName);
1160 filename = m->getFullPathName(filename);
1161 alns = m->getFullPathName(alns);
1163 //to allow for spaces in the path
1164 outputFName = "\"" + outputFName + "\"";
1165 filename = "\"" + filename + "\"";
1166 alns = "\"" + alns + "\"";
1168 vector<char*> cPara;
1170 string uchimeCommand = uchimeLocation;
1171 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1172 uchimeCommand += " ";
1174 uchimeCommand = "\"" + uchimeCommand + "\"";
1177 tempUchime= new char[uchimeCommand.length()+1];
1179 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1180 cPara.push_back(tempUchime);
1182 //are you using a reference file
1183 if (templatefile != "self") {
1184 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1185 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1186 filename = outputFileName;
1187 filename = "\"" + filename + "\"";
1188 //add reference file
1189 char* tempRef = new char[5];
1190 //strcpy(tempRef, "--db");
1191 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1192 cPara.push_back(tempRef);
1193 char* tempR = new char[templatefile.length()+1];
1194 //strcpy(tempR, templatefile.c_str());
1195 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1196 cPara.push_back(tempR);
1199 char* tempIn = new char[8];
1200 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1201 //strcpy(tempIn, "--input");
1202 cPara.push_back(tempIn);
1203 char* temp = new char[filename.length()+1];
1204 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1205 //strcpy(temp, filename.c_str());
1206 cPara.push_back(temp);
1208 char* tempO = new char[12];
1209 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1210 //strcpy(tempO, "--uchimeout");
1211 cPara.push_back(tempO);
1212 char* tempout = new char[outputFName.length()+1];
1213 //strcpy(tempout, outputFName.c_str());
1214 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1215 cPara.push_back(tempout);
1218 char* tempA = new char[13];
1219 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1220 //strcpy(tempA, "--uchimealns");
1221 cPara.push_back(tempA);
1222 char* tempa = new char[alns.length()+1];
1223 //strcpy(tempa, alns.c_str());
1224 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1225 cPara.push_back(tempa);
1229 char* tempskew = new char[9];
1230 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1231 //strcpy(tempskew, "--abskew");
1232 cPara.push_back(tempskew);
1233 char* tempSkew = new char[abskew.length()+1];
1234 //strcpy(tempSkew, abskew.c_str());
1235 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1236 cPara.push_back(tempSkew);
1240 char* tempminh = new char[7];
1241 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1242 //strcpy(tempminh, "--minh");
1243 cPara.push_back(tempminh);
1244 char* tempMinH = new char[minh.length()+1];
1245 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1246 //strcpy(tempMinH, minh.c_str());
1247 cPara.push_back(tempMinH);
1251 char* tempmindiv = new char[9];
1252 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1253 //strcpy(tempmindiv, "--mindiv");
1254 cPara.push_back(tempmindiv);
1255 char* tempMindiv = new char[mindiv.length()+1];
1256 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1257 //strcpy(tempMindiv, mindiv.c_str());
1258 cPara.push_back(tempMindiv);
1262 char* tempxn = new char[5];
1263 //strcpy(tempxn, "--xn");
1264 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1265 cPara.push_back(tempxn);
1266 char* tempXn = new char[xn.length()+1];
1267 //strcpy(tempXn, xn.c_str());
1268 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1269 cPara.push_back(tempXn);
1273 char* tempdn = new char[5];
1274 //strcpy(tempdn, "--dn");
1275 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1276 cPara.push_back(tempdn);
1277 char* tempDn = new char[dn.length()+1];
1278 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1279 //strcpy(tempDn, dn.c_str());
1280 cPara.push_back(tempDn);
1284 char* tempxa = new char[5];
1285 //strcpy(tempxa, "--xa");
1286 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1287 cPara.push_back(tempxa);
1288 char* tempXa = new char[xa.length()+1];
1289 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1290 //strcpy(tempXa, xa.c_str());
1291 cPara.push_back(tempXa);
1295 char* tempchunks = new char[9];
1296 //strcpy(tempchunks, "--chunks");
1297 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1298 cPara.push_back(tempchunks);
1299 char* tempChunks = new char[chunks.length()+1];
1300 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1301 //strcpy(tempChunks, chunks.c_str());
1302 cPara.push_back(tempChunks);
1306 char* tempminchunk = new char[11];
1307 //strcpy(tempminchunk, "--minchunk");
1308 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1309 cPara.push_back(tempminchunk);
1310 char* tempMinchunk = new char[minchunk.length()+1];
1311 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1312 //strcpy(tempMinchunk, minchunk.c_str());
1313 cPara.push_back(tempMinchunk);
1316 if (useIdsmoothwindow) {
1317 char* tempidsmoothwindow = new char[17];
1318 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1319 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1320 cPara.push_back(tempidsmoothwindow);
1321 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1322 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1323 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1324 cPara.push_back(tempIdsmoothwindow);
1327 /*if (useMinsmoothid) {
1328 char* tempminsmoothid = new char[14];
1329 //strcpy(tempminsmoothid, "--minsmoothid");
1330 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1331 cPara.push_back(tempminsmoothid);
1332 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1333 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1334 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1335 cPara.push_back(tempMinsmoothid);
1339 char* tempmaxp = new char[7];
1340 //strcpy(tempmaxp, "--maxp");
1341 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1342 cPara.push_back(tempmaxp);
1343 char* tempMaxp = new char[maxp.length()+1];
1344 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1345 //strcpy(tempMaxp, maxp.c_str());
1346 cPara.push_back(tempMaxp);
1350 char* tempskipgaps = new char[13];
1351 //strcpy(tempskipgaps, "--[no]skipgaps");
1352 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1353 cPara.push_back(tempskipgaps);
1357 char* tempskipgaps2 = new char[14];
1358 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1359 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1360 cPara.push_back(tempskipgaps2);
1364 char* tempminlen = new char[9];
1365 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1366 //strcpy(tempminlen, "--minlen");
1367 cPara.push_back(tempminlen);
1368 char* tempMinlen = new char[minlen.length()+1];
1369 //strcpy(tempMinlen, minlen.c_str());
1370 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1371 cPara.push_back(tempMinlen);
1375 char* tempmaxlen = new char[9];
1376 //strcpy(tempmaxlen, "--maxlen");
1377 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1378 cPara.push_back(tempmaxlen);
1379 char* tempMaxlen = new char[maxlen.length()+1];
1380 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1381 //strcpy(tempMaxlen, maxlen.c_str());
1382 cPara.push_back(tempMaxlen);
1386 char* tempucl = new char[5];
1387 strcpy(tempucl, "--ucl");
1388 cPara.push_back(tempucl);
1391 if (useQueryfract) {
1392 char* tempqueryfract = new char[13];
1393 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1394 //strcpy(tempqueryfract, "--queryfract");
1395 cPara.push_back(tempqueryfract);
1396 char* tempQueryfract = new char[queryfract.length()+1];
1397 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1398 //strcpy(tempQueryfract, queryfract.c_str());
1399 cPara.push_back(tempQueryfract);
1403 char** uchimeParameters;
1404 uchimeParameters = new char*[cPara.size()];
1405 string commandString = "";
1406 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1407 //int numArgs = cPara.size();
1409 //uchime_main(numArgs, uchimeParameters);
1410 //cout << "commandString = " << commandString << endl;
1411 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1413 commandString = "\"" + commandString + "\"";
1415 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1416 system(commandString.c_str());
1419 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1420 delete[] uchimeParameters;
1422 //remove "" from filenames
1423 outputFName = outputFName.substr(1, outputFName.length()-2);
1424 filename = filename.substr(1, filename.length()-2);
1425 alns = alns.substr(1, alns.length()-2);
1427 if (m->control_pressed) { return 0; }
1429 //create accnos file from uchime results
1431 m->openInputFile(outputFName, in);
1434 m->openOutputFile(accnos, out);
1440 if (m->control_pressed) { break; }
1443 string chimeraFlag = "";
1444 in >> chimeraFlag >> name;
1446 //fix name if needed
1447 if (templatefile == "self") {
1448 name = name.substr(0, name.length()-1); //rip off last /
1449 name = name.substr(0, name.find_last_of('/'));
1452 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1455 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1461 //if (templatefile != "self") { m->mothurRemove(filename); }
1465 catch(exception& e) {
1466 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1470 /**************************************************************************************************/
1471 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1472 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1476 m->openInputFile(filename, in);
1479 m->openOutputFile(output, out);
1482 if (m->control_pressed) { break; }
1484 Sequence seq(in); m->gobble(in);
1486 if (seq.getName() != "") { seq.printSequence(out); }
1493 catch(exception& e) {
1494 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1498 /**************************************************************************************************/
1500 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1506 vector<string> files;
1508 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1509 //break up file into multiple files
1510 m->divideFile(filename, processors, files);
1512 if (m->control_pressed) { return 0; }
1514 //loop through and create all the processes you want
1515 while (process != processors) {
1519 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1521 }else if (pid == 0){
1522 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1524 //pass numSeqs to parent
1526 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1527 m->openOutputFile(tempFile, out);
1529 out << numChimeras << endl;
1534 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1535 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1541 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1543 //force parent to wait until all the processes are done
1544 for (int i=0;i<processIDS.size();i++) {
1545 int temp = processIDS[i];
1549 for (int i = 0; i < processIDS.size(); i++) {
1551 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1552 m->openInputFile(tempFile, in);
1555 in >> tempNum; m->gobble(in);
1558 numChimeras += tempNum;
1560 in.close(); m->mothurRemove(tempFile);
1563 //////////////////////////////////////////////////////////////////////////////////////////////////////
1564 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1565 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1566 //////////////////////////////////////////////////////////////////////////////////////////////////////
1571 map<int, ofstream*> filehandles;
1572 map<int, ofstream*>::iterator it3;
1575 for (int i = 0; i < processors; i++) {
1576 temp = new ofstream;
1577 filehandles[i] = temp;
1578 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1579 files.push_back(filename+toString(i)+".temp");
1583 m->openInputFile(filename, in);
1587 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1589 Sequence tempSeq(in); m->gobble(in);
1591 if (tempSeq.getName() != "") {
1592 tempSeq.printSequence(*(filehandles[spot]));
1594 if (spot == processors) { spot = 0; }
1600 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1601 (*(it3->second)).close();
1605 //sanity check for number of processors
1606 if (count < processors) { processors = count; }
1608 vector<uchimeData*> pDataArray;
1609 DWORD dwThreadIdArray[processors-1];
1610 HANDLE hThreadArray[processors-1];
1611 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1613 //Create processor worker threads.
1614 for( int i=1; i<processors; i++ ){
1615 // Allocate memory for thread data.
1616 string extension = toString(i) + ".temp";
1618 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1619 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1620 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1622 pDataArray.push_back(tempUchime);
1623 processIDS.push_back(i);
1625 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1626 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1627 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1631 //using the main process as a worker saves time and memory
1632 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1634 //Wait until all threads have terminated.
1635 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1637 //Close all thread handles and free memory allocations.
1638 for(int i=0; i < pDataArray.size(); i++){
1639 num += pDataArray[i]->count;
1640 numChimeras += pDataArray[i]->numChimeras;
1641 CloseHandle(hThreadArray[i]);
1642 delete pDataArray[i];
1646 //append output files
1647 for(int i=0;i<processIDS.size();i++){
1648 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1649 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1651 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1652 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1655 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1656 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1660 //get rid of the file pieces.
1661 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1664 catch(exception& e) {
1665 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1669 /**************************************************************************************************/
1671 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1679 if (groups.size() < processors) { processors = groups.size(); }
1681 //divide the groups between the processors
1682 vector<linePair> lines;
1683 int numGroupsPerProcessor = groups.size() / processors;
1684 for (int i = 0; i < processors; i++) {
1685 int startIndex = i * numGroupsPerProcessor;
1686 int endIndex = (i+1) * numGroupsPerProcessor;
1687 if(i == (processors - 1)){ endIndex = groups.size(); }
1688 lines.push_back(linePair(startIndex, endIndex));
1691 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1693 //loop through and create all the processes you want
1694 while (process != processors) {
1698 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1700 }else if (pid == 0){
1701 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1703 //pass numSeqs to parent
1705 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1706 m->openOutputFile(tempFile, out);
1712 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1713 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1719 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1721 //force parent to wait until all the processes are done
1722 for (int i=0;i<processIDS.size();i++) {
1723 int temp = processIDS[i];
1727 for (int i = 0; i < processIDS.size(); i++) {
1729 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1730 m->openInputFile(tempFile, in);
1731 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1732 in.close(); m->mothurRemove(tempFile);
1736 //////////////////////////////////////////////////////////////////////////////////////////////////////
1737 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1738 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1739 //////////////////////////////////////////////////////////////////////////////////////////////////////
1741 vector<uchimeData*> pDataArray;
1742 DWORD dwThreadIdArray[processors-1];
1743 HANDLE hThreadArray[processors-1];
1745 //Create processor worker threads.
1746 for( int i=1; i<processors; i++ ){
1747 // Allocate memory for thread data.
1748 string extension = toString(i) + ".temp";
1750 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1751 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1752 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1754 pDataArray.push_back(tempUchime);
1755 processIDS.push_back(i);
1757 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1758 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1759 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1763 //using the main process as a worker saves time and memory
1764 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1766 //Wait until all threads have terminated.
1767 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1769 //Close all thread handles and free memory allocations.
1770 for(int i=0; i < pDataArray.size(); i++){
1771 num += pDataArray[i]->count;
1772 CloseHandle(hThreadArray[i]);
1773 delete pDataArray[i];
1778 //append output files
1779 for(int i=0;i<processIDS.size();i++){
1780 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1781 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1783 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1784 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1787 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1788 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1795 catch(exception& e) {
1796 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1800 /**************************************************************************************************/