2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
29 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
30 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
31 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
32 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
33 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
34 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
35 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
36 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
37 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
38 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
39 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
40 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
41 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
42 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
43 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
44 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
45 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
47 vector<string> myArray;
48 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
52 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56 //**********************************************************************************************************************
57 string ChimeraUchimeCommand::getHelpString(){
59 string helpString = "";
60 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
61 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
62 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
63 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
64 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
65 helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
66 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
67 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
68 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
69 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
70 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
71 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
72 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
73 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
74 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
75 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
76 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
77 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
78 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
79 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
80 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
81 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
82 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
83 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
84 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
85 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
86 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
87 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
89 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
91 helpString += "The chimera.uchime command should be in the following format: \n";
92 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
93 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
94 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
98 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
102 //**********************************************************************************************************************
103 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){
105 string outputFileName = "";
106 map<string, vector<string> >::iterator it;
108 //is this a type this command creates
109 it = outputTypes.find(type);
110 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
112 if (type == "chimera") { outputFileName = "uchime.chimeras"; }
113 else if (type == "accnos") { outputFileName = "uchime.accnos"; }
114 else if (type == "alns") { outputFileName = "uchime.alns"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
117 return outputFileName;
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
134 catch(exception& e) {
135 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
139 //***************************************************************************************************************
140 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
142 abort = false; calledHelp = false; hasName=false; hasCount=false;
143 ReferenceDB* rdb = ReferenceDB::getInstance();
145 //allow user to run help
146 if(option == "help") { help(); abort = true; calledHelp = true; }
147 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
150 vector<string> myArray = setParameters();
152 OptionParser parser(option);
153 map<string,string> parameters = parser.getParameters();
155 ValidParameters validParameter("chimera.uchime");
156 map<string,string>::iterator it;
158 //check to make sure all parameters are valid for command
159 for (it = parameters.begin(); it != parameters.end(); it++) {
160 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
163 vector<string> tempOutNames;
164 outputTypes["chimera"] = tempOutNames;
165 outputTypes["accnos"] = tempOutNames;
166 outputTypes["alns"] = tempOutNames;
168 //if the user changes the input directory command factory will send this info to us in the output parameter
169 string inputDir = validParameter.validFile(parameters, "inputdir", false);
170 if (inputDir == "not found"){ inputDir = ""; }
172 //check for required parameters
173 fastafile = validParameter.validFile(parameters, "fasta", false);
174 if (fastafile == "not found") {
175 //if there is a current fasta file, use it
176 string filename = m->getFastaFile();
177 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
180 m->splitAtDash(fastafile, fastaFileNames);
182 //go through files and make sure they are good, if not, then disregard them
183 for (int i = 0; i < fastaFileNames.size(); i++) {
186 if (fastaFileNames[i] == "current") {
187 fastaFileNames[i] = m->getFastaFile();
188 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
190 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
191 //erase from file list
192 fastaFileNames.erase(fastaFileNames.begin()+i);
199 if (inputDir != "") {
200 string path = m->hasPath(fastaFileNames[i]);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
208 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
210 //if you can't open it, try default location
211 if (ableToOpen == 1) {
212 if (m->getDefaultPath() != "") { //default path is set
213 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
214 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
216 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
218 fastaFileNames[i] = tryPath;
222 if (ableToOpen == 1) {
223 if (m->getOutputDir() != "") { //default path is set
224 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
225 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
227 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
229 fastaFileNames[i] = tryPath;
235 if (ableToOpen == 1) {
236 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
237 //erase from file list
238 fastaFileNames.erase(fastaFileNames.begin()+i);
241 m->setFastaFile(fastaFileNames[i]);
246 //make sure there is at least one valid file left
247 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
251 //check for required parameters
252 namefile = validParameter.validFile(parameters, "name", false);
253 if (namefile == "not found") { namefile = ""; }
255 m->splitAtDash(namefile, nameFileNames);
257 //go through files and make sure they are good, if not, then disregard them
258 for (int i = 0; i < nameFileNames.size(); i++) {
261 if (nameFileNames[i] == "current") {
262 nameFileNames[i] = m->getNameFile();
263 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
265 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
266 //erase from file list
267 nameFileNames.erase(nameFileNames.begin()+i);
274 if (inputDir != "") {
275 string path = m->hasPath(nameFileNames[i]);
276 //if the user has not given a path then, add inputdir. else leave path alone.
277 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
283 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
285 //if you can't open it, try default location
286 if (ableToOpen == 1) {
287 if (m->getDefaultPath() != "") { //default path is set
288 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
289 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
291 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
293 nameFileNames[i] = tryPath;
297 if (ableToOpen == 1) {
298 if (m->getOutputDir() != "") { //default path is set
299 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
300 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
302 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
304 nameFileNames[i] = tryPath;
310 if (ableToOpen == 1) {
311 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
312 //erase from file list
313 nameFileNames.erase(nameFileNames.begin()+i);
316 m->setNameFile(nameFileNames[i]);
322 if (nameFileNames.size() != 0) { hasName = true; }
324 //check for required parameters
325 vector<string> countfileNames;
326 countfile = validParameter.validFile(parameters, "count", false);
327 if (countfile == "not found") {
330 m->splitAtDash(countfile, countfileNames);
332 //go through files and make sure they are good, if not, then disregard them
333 for (int i = 0; i < countfileNames.size(); i++) {
336 if (countfileNames[i] == "current") {
337 countfileNames[i] = m->getCountTableFile();
338 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
340 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
341 //erase from file list
342 countfileNames.erase(countfileNames.begin()+i);
349 if (inputDir != "") {
350 string path = m->hasPath(countfileNames[i]);
351 //if the user has not given a path then, add inputdir. else leave path alone.
352 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
358 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
360 //if you can't open it, try default location
361 if (ableToOpen == 1) {
362 if (m->getDefaultPath() != "") { //default path is set
363 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
364 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
366 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
368 countfileNames[i] = tryPath;
372 if (ableToOpen == 1) {
373 if (m->getOutputDir() != "") { //default path is set
374 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
375 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
377 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
379 countfileNames[i] = tryPath;
385 if (ableToOpen == 1) {
386 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
387 //erase from file list
388 countfileNames.erase(countfileNames.begin()+i);
391 m->setCountTableFile(countfileNames[i]);
397 if (countfileNames.size() != 0) { hasCount = true; }
399 //make sure there is at least one valid file left
400 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
402 if (!hasName && hasCount) { nameFileNames = countfileNames; }
404 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
406 bool hasGroup = true;
407 groupfile = validParameter.validFile(parameters, "group", false);
408 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
410 m->splitAtDash(groupfile, groupFileNames);
412 //go through files and make sure they are good, if not, then disregard them
413 for (int i = 0; i < groupFileNames.size(); i++) {
416 if (groupFileNames[i] == "current") {
417 groupFileNames[i] = m->getGroupFile();
418 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
420 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
421 //erase from file list
422 groupFileNames.erase(groupFileNames.begin()+i);
429 if (inputDir != "") {
430 string path = m->hasPath(groupFileNames[i]);
431 //if the user has not given a path then, add inputdir. else leave path alone.
432 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
438 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
440 //if you can't open it, try default location
441 if (ableToOpen == 1) {
442 if (m->getDefaultPath() != "") { //default path is set
443 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
444 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
446 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
448 groupFileNames[i] = tryPath;
452 if (ableToOpen == 1) {
453 if (m->getOutputDir() != "") { //default path is set
454 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
455 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
457 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
459 groupFileNames[i] = tryPath;
465 if (ableToOpen == 1) {
466 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
467 //erase from file list
468 groupFileNames.erase(groupFileNames.begin()+i);
471 m->setGroupFile(groupFileNames[i]);
476 //make sure there is at least one valid file left
477 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
482 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
483 //if the user changes the output directory command factory will send this info to us in the output parameter
484 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
487 //if the user changes the output directory command factory will send this info to us in the output parameter
488 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
491 it = parameters.find("reference");
492 //user has given a template file
493 if(it != parameters.end()){
494 if (it->second == "self") { templatefile = "self"; }
496 path = m->hasPath(it->second);
497 //if the user has not given a path then, add inputdir. else leave path alone.
498 if (path == "") { parameters["reference"] = inputDir + it->second; }
500 templatefile = validParameter.validFile(parameters, "reference", true);
501 if (templatefile == "not open") { abort = true; }
502 else if (templatefile == "not found") { //check for saved reference sequences
503 if (rdb->getSavedReference() != "") {
504 templatefile = rdb->getSavedReference();
505 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
507 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
508 m->mothurOutEndLine();
513 }else if (hasName) { templatefile = "self"; }
514 else if (hasCount) { templatefile = "self"; }
516 if (rdb->getSavedReference() != "") {
517 templatefile = rdb->getSavedReference();
518 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
520 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
521 m->mothurOutEndLine();
522 templatefile = ""; abort = true;
526 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
527 m->setProcessors(temp);
528 m->mothurConvert(temp, processors);
530 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
531 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
533 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
534 chimealns = m->isTrue(temp);
536 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
537 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
538 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
539 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
540 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
541 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
542 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
543 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
544 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
545 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
546 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
547 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
549 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
550 ucl = m->isTrue(temp);
552 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
553 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
555 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
556 skipgaps = m->isTrue(temp);
558 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
559 skipgaps2 = m->isTrue(temp);
561 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
562 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
564 //look for uchime exe
566 string tempPath = path;
567 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
568 path = path.substr(0, (tempPath.find_last_of('m')));
570 string uchimeCommand;
571 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
572 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
574 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
575 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
576 newCommand->execute();
578 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
579 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
580 newCommand->execute();
584 uchimeCommand = path + "uchime.exe";
587 //test to make sure uchime exists
589 uchimeCommand = m->getFullPathName(uchimeCommand);
590 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
591 if(ableToOpen == 1) {
592 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
593 //check to see if uchime is in the path??
595 string uLocation = m->findProgramPath("uchime");
599 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
600 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
602 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
605 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
606 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
607 }else { uchimeLocation = uchimeCommand; }
609 uchimeLocation = m->getFullPathName(uchimeLocation);
612 catch(exception& e) {
613 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
617 //***************************************************************************************************************
619 int ChimeraUchimeCommand::execute(){
622 if (abort == true) { if (calledHelp) { return 0; } return 2; }
624 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
626 for (int s = 0; s < fastaFileNames.size(); s++) {
628 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
630 int start = time(NULL);
631 string nameFile = "";
632 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
633 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
634 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
635 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns");
636 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
638 //you provided a groupfile
639 string groupFile = "";
640 bool hasGroup = false;
641 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
644 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
647 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
649 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
650 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
651 nameFile = nameFileNames[s];
652 }else { nameFile = getNamesFile(fastaFileNames[s]); }
654 map<string, string> seqs;
655 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
658 vector<seqPriorityNode> nameMapCount;
662 ct.readTable(nameFile);
663 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
664 int num = ct.getNumSeqs(it->first);
665 if (num == 0) { error = 1; }
667 seqPriorityNode temp(num, it->second, it->first);
668 nameMapCount.push_back(temp);
672 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
674 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
675 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
677 printFile(nameMapCount, newFasta);
678 fastaFileNames[s] = newFasta;
681 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
684 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
685 nameFile = nameFileNames[s];
686 }else { nameFile = getNamesFile(fastaFileNames[s]); }
688 //Parse sequences by group
689 vector<string> groups;
690 map<string, string> uniqueNames;
692 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
693 groups = cparser->getNamesOfGroups();
694 uniqueNames = cparser->getAllSeqsMap();
696 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
697 groups = sparser->getNamesOfGroups();
698 uniqueNames = sparser->getAllSeqsMap();
701 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
704 ofstream out, out1, out2;
705 m->openOutputFile(outputFileName, out); out.close();
706 m->openOutputFile(accnosFileName, out1); out1.close();
707 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
710 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
711 else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
713 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
714 if (hasCount) { delete cparser; }
715 else { delete sparser; }
717 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
719 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
720 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
722 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
725 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
730 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
731 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
735 m->openOutputFile(outputFileName+".temp", out);
736 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
739 m->appendFiles(outputFileName, outputFileName+".temp");
740 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
742 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
744 //remove file made for uchime
745 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
747 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
750 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
751 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
752 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
755 //set accnos file as new current accnosfile
757 itTypes = outputTypes.find("accnos");
758 if (itTypes != outputTypes.end()) {
759 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
762 m->mothurOutEndLine();
763 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
764 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
765 m->mothurOutEndLine();
770 catch(exception& e) {
771 m->errorOut(e, "ChimeraUchimeCommand", "execute");
775 //**********************************************************************************************************************
776 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
778 map<string, string>::iterator itUnique;
783 m->openInputFile(accnosFileName, in2);
786 m->openOutputFile(accnosFileName+".temp", out2);
789 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
790 set<string>::iterator itNames;
791 set<string> chimerasInFile;
792 set<string>::iterator itChimeras;
796 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
798 in2 >> name; m->gobble(in2);
801 itUnique = uniqueNames.find(name);
803 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
805 itChimeras = chimerasInFile.find((itUnique->second));
807 if (itChimeras == chimerasInFile.end()) {
808 out2 << itUnique->second << endl;
809 chimerasInFile.insert((itUnique->second));
817 m->mothurRemove(accnosFileName);
818 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
824 m->openInputFile(outputFileName, in);
827 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
828 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
831 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
834 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
835 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
836 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
837 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
842 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
845 in >> temp1; m->gobble(in);
846 in >> name; m->gobble(in);
847 in >> parent1; m->gobble(in);
848 in >> parent2; m->gobble(in);
849 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
852 //parse name - name will look like U68590/ab=1/
853 string restOfName = "";
854 int pos = name.find_first_of('/');
855 if (pos != string::npos) {
856 restOfName = name.substr(pos);
857 name = name.substr(0, pos);
861 itUnique = uniqueNames.find(name);
863 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
865 name = itUnique->second;
866 //is this name already in the file
867 itNames = namesInFile.find((name));
869 if (itNames == namesInFile.end()) { //no not in file
870 if (flag == "N") { //are you really a no??
871 //is this sequence really not chimeric??
872 itChimeras = chimerasInFile.find(name);
874 //then you really are a no so print, otherwise skip
875 if (itChimeras == chimerasInFile.end()) { print = true; }
876 }else{ print = true; }
881 out << temp1 << '\t' << name << restOfName << '\t';
882 namesInFile.insert(name);
884 //parse parent1 names
885 if (parent1 != "*") {
887 pos = parent1.find_first_of('/');
888 if (pos != string::npos) {
889 restOfName = parent1.substr(pos);
890 parent1 = parent1.substr(0, pos);
893 itUnique = uniqueNames.find(parent1);
894 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
895 else { out << itUnique->second << restOfName << '\t'; }
896 }else { out << parent1 << '\t'; }
898 //parse parent2 names
899 if (parent2 != "*") {
901 pos = parent2.find_first_of('/');
902 if (pos != string::npos) {
903 restOfName = parent2.substr(pos);
904 parent2 = parent2.substr(0, pos);
907 itUnique = uniqueNames.find(parent2);
908 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909 else { out << itUnique->second << restOfName << '\t'; }
910 }else { out << parent2 << '\t'; }
912 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
918 m->mothurRemove(outputFileName);
919 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
923 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
925 ------------------------------------------------------------------------
926 Query ( 179 nt) F21Fcsw_11639/ab=591/
927 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
928 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
930 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
931 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
932 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
933 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
934 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
935 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
937 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
938 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
939 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
940 Diffs NNN N N N N N BB NNN
941 Votes 000 0 0 0 0 0 ++ 000
942 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
944 A 159 TGGAACTGAGACACGGTCCAA 179
945 Q 159 TGGAACTGAGACACGGTCCAA 179
946 B 161 TGGAACTGAGACACGGTCCAA 181
949 Model BBBBBBBBBBBBBBBBBBBBB
951 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
952 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
956 m->openInputFile(alnsFileName, in3);
959 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
966 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
969 line = m->getline(in3);
973 istringstream iss(line);
976 //are you a name line
977 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
979 for (int i = 0; i < line.length(); i++) {
981 if (line[i] == ')') { break; }
982 else { out3 << line[i]; }
985 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
986 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
988 out << line[spot] << line[spot+1];
990 name = line.substr(spot+2);
992 //parse name - name will either look like U68590/ab=1/ or U68590
993 string restOfName = "";
994 int pos = name.find_first_of('/');
995 if (pos != string::npos) {
996 restOfName = name.substr(pos);
997 name = name.substr(0, pos);
1001 itUnique = uniqueNames.find(name);
1003 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1005 //only limit repeats on query names
1006 if (temp == "Query") {
1007 itNames = namesInFile.find((itUnique->second));
1009 if (itNames == namesInFile.end()) {
1010 out << itUnique->second << restOfName << endl;
1011 namesInFile.insert((itUnique->second));
1013 }else { out << itUnique->second << restOfName << endl; }
1018 }else { //not need to alter line
1019 out3 << line << endl;
1021 }else { out3 << endl; }
1026 m->mothurRemove(alnsFileName);
1027 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1032 catch(exception& e) {
1033 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1037 //**********************************************************************************************************************
1038 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1041 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1044 m->openOutputFile(filename, out);
1046 //print new file in order of
1047 for (int i = 0; i < nameMapCount.size(); i++) {
1048 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1054 catch(exception& e) {
1055 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1059 //**********************************************************************************************************************
1060 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1062 //create input file for uchime
1063 //read through fastafile and store info
1065 m->openInputFile(filename, in);
1069 if (m->control_pressed) { in.close(); return 0; }
1071 Sequence seq(in); m->gobble(in);
1072 seqs[seq.getName()] = seq.getAligned();
1078 catch(exception& e) {
1079 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1083 //**********************************************************************************************************************
1085 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1087 string nameFile = "";
1089 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1091 //use unique.seqs to create new name and fastafile
1092 string inputString = "fasta=" + inputFile;
1093 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1094 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1095 m->mothurCalling = true;
1097 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1098 uniqueCommand->execute();
1100 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1102 delete uniqueCommand;
1103 m->mothurCalling = false;
1104 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1106 nameFile = filenames["name"][0];
1107 inputFile = filenames["fasta"][0];
1111 catch(exception& e) {
1112 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1116 //**********************************************************************************************************************
1117 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1121 int numChimeras = 0;
1123 for (int i = start; i < end; i++) {
1124 int start = time(NULL); if (m->control_pressed) { return 0; }
1127 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1128 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1130 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1131 totalSeqs += numSeqs;
1133 if (m->control_pressed) { return 0; }
1135 //remove file made for uchime
1136 if (!m->debug) { m->mothurRemove(filename); }
1137 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1140 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1141 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1142 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1144 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1149 catch(exception& e) {
1150 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1154 //**********************************************************************************************************************
1156 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1159 outputFName = m->getFullPathName(outputFName);
1160 filename = m->getFullPathName(filename);
1161 alns = m->getFullPathName(alns);
1163 //to allow for spaces in the path
1164 outputFName = "\"" + outputFName + "\"";
1165 filename = "\"" + filename + "\"";
1166 alns = "\"" + alns + "\"";
1168 vector<char*> cPara;
1170 string uchimeCommand = uchimeLocation;
1171 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1172 uchimeCommand += " ";
1174 uchimeCommand = "\"" + uchimeCommand + "\"";
1178 tempUchime= new char[uchimeCommand.length()+1];
1180 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1181 cPara.push_back(tempUchime);
1183 char* tempIn = new char[8];
1184 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1185 //strcpy(tempIn, "--input");
1186 cPara.push_back(tempIn);
1187 char* temp = new char[filename.length()+1];
1188 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1189 //strcpy(temp, filename.c_str());
1190 cPara.push_back(temp);
1192 //are you using a reference file
1193 if (templatefile != "self") {
1194 //add reference file
1195 char* tempRef = new char[5];
1196 //strcpy(tempRef, "--db");
1197 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1198 cPara.push_back(tempRef);
1199 char* tempR = new char[templatefile.length()+1];
1200 //strcpy(tempR, templatefile.c_str());
1201 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1202 cPara.push_back(tempR);
1205 char* tempO = new char[12];
1206 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1207 //strcpy(tempO, "--uchimeout");
1208 cPara.push_back(tempO);
1209 char* tempout = new char[outputFName.length()+1];
1210 //strcpy(tempout, outputFName.c_str());
1211 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1212 cPara.push_back(tempout);
1215 char* tempA = new char[13];
1216 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1217 //strcpy(tempA, "--uchimealns");
1218 cPara.push_back(tempA);
1219 char* tempa = new char[alns.length()+1];
1220 //strcpy(tempa, alns.c_str());
1221 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1222 cPara.push_back(tempa);
1226 char* tempskew = new char[9];
1227 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1228 //strcpy(tempskew, "--abskew");
1229 cPara.push_back(tempskew);
1230 char* tempSkew = new char[abskew.length()+1];
1231 //strcpy(tempSkew, abskew.c_str());
1232 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1233 cPara.push_back(tempSkew);
1237 char* tempminh = new char[7];
1238 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1239 //strcpy(tempminh, "--minh");
1240 cPara.push_back(tempminh);
1241 char* tempMinH = new char[minh.length()+1];
1242 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1243 //strcpy(tempMinH, minh.c_str());
1244 cPara.push_back(tempMinH);
1248 char* tempmindiv = new char[9];
1249 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1250 //strcpy(tempmindiv, "--mindiv");
1251 cPara.push_back(tempmindiv);
1252 char* tempMindiv = new char[mindiv.length()+1];
1253 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1254 //strcpy(tempMindiv, mindiv.c_str());
1255 cPara.push_back(tempMindiv);
1259 char* tempxn = new char[5];
1260 //strcpy(tempxn, "--xn");
1261 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1262 cPara.push_back(tempxn);
1263 char* tempXn = new char[xn.length()+1];
1264 //strcpy(tempXn, xn.c_str());
1265 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1266 cPara.push_back(tempXn);
1270 char* tempdn = new char[5];
1271 //strcpy(tempdn, "--dn");
1272 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1273 cPara.push_back(tempdn);
1274 char* tempDn = new char[dn.length()+1];
1275 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1276 //strcpy(tempDn, dn.c_str());
1277 cPara.push_back(tempDn);
1281 char* tempxa = new char[5];
1282 //strcpy(tempxa, "--xa");
1283 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1284 cPara.push_back(tempxa);
1285 char* tempXa = new char[xa.length()+1];
1286 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1287 //strcpy(tempXa, xa.c_str());
1288 cPara.push_back(tempXa);
1292 char* tempchunks = new char[9];
1293 //strcpy(tempchunks, "--chunks");
1294 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1295 cPara.push_back(tempchunks);
1296 char* tempChunks = new char[chunks.length()+1];
1297 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1298 //strcpy(tempChunks, chunks.c_str());
1299 cPara.push_back(tempChunks);
1303 char* tempminchunk = new char[11];
1304 //strcpy(tempminchunk, "--minchunk");
1305 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1306 cPara.push_back(tempminchunk);
1307 char* tempMinchunk = new char[minchunk.length()+1];
1308 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1309 //strcpy(tempMinchunk, minchunk.c_str());
1310 cPara.push_back(tempMinchunk);
1313 if (useIdsmoothwindow) {
1314 char* tempidsmoothwindow = new char[17];
1315 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1316 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1317 cPara.push_back(tempidsmoothwindow);
1318 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1319 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1320 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1321 cPara.push_back(tempIdsmoothwindow);
1324 /*if (useMinsmoothid) {
1325 char* tempminsmoothid = new char[14];
1326 //strcpy(tempminsmoothid, "--minsmoothid");
1327 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1328 cPara.push_back(tempminsmoothid);
1329 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1330 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1331 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1332 cPara.push_back(tempMinsmoothid);
1336 char* tempmaxp = new char[7];
1337 //strcpy(tempmaxp, "--maxp");
1338 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1339 cPara.push_back(tempmaxp);
1340 char* tempMaxp = new char[maxp.length()+1];
1341 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1342 //strcpy(tempMaxp, maxp.c_str());
1343 cPara.push_back(tempMaxp);
1347 char* tempskipgaps = new char[13];
1348 //strcpy(tempskipgaps, "--[no]skipgaps");
1349 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1350 cPara.push_back(tempskipgaps);
1354 char* tempskipgaps2 = new char[14];
1355 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1356 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1357 cPara.push_back(tempskipgaps2);
1361 char* tempminlen = new char[9];
1362 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1363 //strcpy(tempminlen, "--minlen");
1364 cPara.push_back(tempminlen);
1365 char* tempMinlen = new char[minlen.length()+1];
1366 //strcpy(tempMinlen, minlen.c_str());
1367 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1368 cPara.push_back(tempMinlen);
1372 char* tempmaxlen = new char[9];
1373 //strcpy(tempmaxlen, "--maxlen");
1374 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1375 cPara.push_back(tempmaxlen);
1376 char* tempMaxlen = new char[maxlen.length()+1];
1377 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1378 //strcpy(tempMaxlen, maxlen.c_str());
1379 cPara.push_back(tempMaxlen);
1383 char* tempucl = new char[5];
1384 strcpy(tempucl, "--ucl");
1385 cPara.push_back(tempucl);
1388 if (useQueryfract) {
1389 char* tempqueryfract = new char[13];
1390 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1391 //strcpy(tempqueryfract, "--queryfract");
1392 cPara.push_back(tempqueryfract);
1393 char* tempQueryfract = new char[queryfract.length()+1];
1394 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1395 //strcpy(tempQueryfract, queryfract.c_str());
1396 cPara.push_back(tempQueryfract);
1400 char** uchimeParameters;
1401 uchimeParameters = new char*[cPara.size()];
1402 string commandString = "";
1403 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1404 //int numArgs = cPara.size();
1406 //uchime_main(numArgs, uchimeParameters);
1407 //cout << "commandString = " << commandString << endl;
1408 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1410 commandString = "\"" + commandString + "\"";
1412 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1413 system(commandString.c_str());
1416 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1417 delete[] uchimeParameters;
1419 //remove "" from filenames
1420 outputFName = outputFName.substr(1, outputFName.length()-2);
1421 filename = filename.substr(1, filename.length()-2);
1422 alns = alns.substr(1, alns.length()-2);
1424 if (m->control_pressed) { return 0; }
1426 //create accnos file from uchime results
1428 m->openInputFile(outputFName, in);
1431 m->openOutputFile(accnos, out);
1437 if (m->control_pressed) { break; }
1440 string chimeraFlag = "";
1441 in >> chimeraFlag >> name;
1443 //fix name if needed
1444 if (templatefile == "self") {
1445 name = name.substr(0, name.length()-1); //rip off last /
1446 name = name.substr(0, name.find_last_of('/'));
1449 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1452 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1460 catch(exception& e) {
1461 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1465 /**************************************************************************************************/
1467 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1473 vector<string> files;
1475 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1476 //break up file into multiple files
1477 m->divideFile(filename, processors, files);
1479 if (m->control_pressed) { return 0; }
1481 //loop through and create all the processes you want
1482 while (process != processors) {
1486 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1488 }else if (pid == 0){
1489 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1491 //pass numSeqs to parent
1493 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1494 m->openOutputFile(tempFile, out);
1496 out << numChimeras << endl;
1501 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1502 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1508 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1510 //force parent to wait until all the processes are done
1511 for (int i=0;i<processIDS.size();i++) {
1512 int temp = processIDS[i];
1516 for (int i = 0; i < processIDS.size(); i++) {
1518 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1519 m->openInputFile(tempFile, in);
1522 in >> tempNum; m->gobble(in);
1525 numChimeras += tempNum;
1527 in.close(); m->mothurRemove(tempFile);
1530 //////////////////////////////////////////////////////////////////////////////////////////////////////
1531 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1532 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1533 //////////////////////////////////////////////////////////////////////////////////////////////////////
1538 map<int, ofstream*> filehandles;
1539 map<int, ofstream*>::iterator it3;
1542 for (int i = 0; i < processors; i++) {
1543 temp = new ofstream;
1544 filehandles[i] = temp;
1545 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1546 files.push_back(filename+toString(i)+".temp");
1550 m->openInputFile(filename, in);
1554 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1556 Sequence tempSeq(in); m->gobble(in);
1558 if (tempSeq.getName() != "") {
1559 tempSeq.printSequence(*(filehandles[spot]));
1561 if (spot == processors) { spot = 0; }
1567 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1568 (*(it3->second)).close();
1572 //sanity check for number of processors
1573 if (count < processors) { processors = count; }
1575 vector<uchimeData*> pDataArray;
1576 DWORD dwThreadIdArray[processors-1];
1577 HANDLE hThreadArray[processors-1];
1578 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1580 //Create processor worker threads.
1581 for( int i=1; i<processors; i++ ){
1582 // Allocate memory for thread data.
1583 string extension = toString(i) + ".temp";
1585 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1586 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1587 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1589 pDataArray.push_back(tempUchime);
1590 processIDS.push_back(i);
1592 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1593 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1594 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1598 //using the main process as a worker saves time and memory
1599 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1601 //Wait until all threads have terminated.
1602 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1604 //Close all thread handles and free memory allocations.
1605 for(int i=0; i < pDataArray.size(); i++){
1606 num += pDataArray[i]->count;
1607 numChimeras += pDataArray[i]->numChimeras;
1608 CloseHandle(hThreadArray[i]);
1609 delete pDataArray[i];
1613 //append output files
1614 for(int i=0;i<processIDS.size();i++){
1615 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1616 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1618 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1619 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1622 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1623 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1627 //get rid of the file pieces.
1628 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1631 catch(exception& e) {
1632 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1636 /**************************************************************************************************/
1638 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1646 if (groups.size() < processors) { processors = groups.size(); }
1648 //divide the groups between the processors
1649 vector<linePair> lines;
1650 int numGroupsPerProcessor = groups.size() / processors;
1651 for (int i = 0; i < processors; i++) {
1652 int startIndex = i * numGroupsPerProcessor;
1653 int endIndex = (i+1) * numGroupsPerProcessor;
1654 if(i == (processors - 1)){ endIndex = groups.size(); }
1655 lines.push_back(linePair(startIndex, endIndex));
1658 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1660 //loop through and create all the processes you want
1661 while (process != processors) {
1665 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1667 }else if (pid == 0){
1668 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1670 //pass numSeqs to parent
1672 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1673 m->openOutputFile(tempFile, out);
1679 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1680 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1686 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1688 //force parent to wait until all the processes are done
1689 for (int i=0;i<processIDS.size();i++) {
1690 int temp = processIDS[i];
1694 for (int i = 0; i < processIDS.size(); i++) {
1696 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1697 m->openInputFile(tempFile, in);
1698 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1699 in.close(); m->mothurRemove(tempFile);
1703 //////////////////////////////////////////////////////////////////////////////////////////////////////
1704 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1705 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1706 //////////////////////////////////////////////////////////////////////////////////////////////////////
1708 vector<uchimeData*> pDataArray;
1709 DWORD dwThreadIdArray[processors-1];
1710 HANDLE hThreadArray[processors-1];
1712 //Create processor worker threads.
1713 for( int i=1; i<processors; i++ ){
1714 // Allocate memory for thread data.
1715 string extension = toString(i) + ".temp";
1717 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1718 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1719 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1721 pDataArray.push_back(tempUchime);
1722 processIDS.push_back(i);
1724 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1725 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1726 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1730 //using the main process as a worker saves time and memory
1731 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1733 //Wait until all threads have terminated.
1734 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1736 //Close all thread handles and free memory allocations.
1737 for(int i=0; i < pDataArray.size(); i++){
1738 num += pDataArray[i]->count;
1739 CloseHandle(hThreadArray[i]);
1740 delete pDataArray[i];
1745 //append output files
1746 for(int i=0;i<processIDS.size();i++){
1747 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1748 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1750 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1751 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1754 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1755 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1762 catch(exception& e) {
1763 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1767 /**************************************************************************************************/