2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
29 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
30 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
31 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
32 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
33 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
34 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
35 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
36 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
37 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
38 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdups);
40 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
41 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
42 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
43 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
44 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
45 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
46 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
47 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
49 vector<string> myArray;
50 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
54 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
58 //**********************************************************************************************************************
59 string ChimeraUchimeCommand::getHelpString(){
61 string helpString = "";
62 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
63 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
64 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
65 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
66 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
67 helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
68 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
69 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
70 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
71 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
72 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
73 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
74 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
75 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
76 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
77 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
78 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
79 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
80 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
81 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
82 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
83 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
84 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
85 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
86 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
87 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
88 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
89 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
90 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 helpString += "The chimera.uchime command should be in the following format: \n";
95 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
96 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
97 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
100 catch(exception& e) {
101 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
105 //**********************************************************************************************************************
106 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){
108 string outputFileName = "";
109 map<string, vector<string> >::iterator it;
111 //is this a type this command creates
112 it = outputTypes.find(type);
113 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
115 if (type == "chimera") { outputFileName = "uchime.chimeras"; }
116 else if (type == "accnos") { outputFileName = "uchime.accnos"; }
117 else if (type == "alns") { outputFileName = "uchime.alns"; }
118 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
120 return outputFileName;
122 catch(exception& e) {
123 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
127 //**********************************************************************************************************************
128 ChimeraUchimeCommand::ChimeraUchimeCommand(){
130 abort = true; calledHelp = true;
132 vector<string> tempOutNames;
133 outputTypes["chimera"] = tempOutNames;
134 outputTypes["accnos"] = tempOutNames;
135 outputTypes["alns"] = tempOutNames;
137 catch(exception& e) {
138 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
142 //***************************************************************************************************************
143 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
145 abort = false; calledHelp = false; hasName=false; hasCount=false;
146 ReferenceDB* rdb = ReferenceDB::getInstance();
148 //allow user to run help
149 if(option == "help") { help(); abort = true; calledHelp = true; }
150 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153 vector<string> myArray = setParameters();
155 OptionParser parser(option);
156 map<string,string> parameters = parser.getParameters();
158 ValidParameters validParameter("chimera.uchime");
159 map<string,string>::iterator it;
161 //check to make sure all parameters are valid for command
162 for (it = parameters.begin(); it != parameters.end(); it++) {
163 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
166 vector<string> tempOutNames;
167 outputTypes["chimera"] = tempOutNames;
168 outputTypes["accnos"] = tempOutNames;
169 outputTypes["alns"] = tempOutNames;
171 //if the user changes the input directory command factory will send this info to us in the output parameter
172 string inputDir = validParameter.validFile(parameters, "inputdir", false);
173 if (inputDir == "not found"){ inputDir = ""; }
175 //check for required parameters
176 fastafile = validParameter.validFile(parameters, "fasta", false);
177 if (fastafile == "not found") {
178 //if there is a current fasta file, use it
179 string filename = m->getFastaFile();
180 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
181 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
183 m->splitAtDash(fastafile, fastaFileNames);
185 //go through files and make sure they are good, if not, then disregard them
186 for (int i = 0; i < fastaFileNames.size(); i++) {
189 if (fastaFileNames[i] == "current") {
190 fastaFileNames[i] = m->getFastaFile();
191 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
193 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
194 //erase from file list
195 fastaFileNames.erase(fastaFileNames.begin()+i);
202 if (inputDir != "") {
203 string path = m->hasPath(fastaFileNames[i]);
204 //if the user has not given a path then, add inputdir. else leave path alone.
205 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
211 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
213 //if you can't open it, try default location
214 if (ableToOpen == 1) {
215 if (m->getDefaultPath() != "") { //default path is set
216 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
217 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
219 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
221 fastaFileNames[i] = tryPath;
225 if (ableToOpen == 1) {
226 if (m->getOutputDir() != "") { //default path is set
227 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
228 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
230 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
232 fastaFileNames[i] = tryPath;
238 if (ableToOpen == 1) {
239 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
240 //erase from file list
241 fastaFileNames.erase(fastaFileNames.begin()+i);
244 m->setFastaFile(fastaFileNames[i]);
249 //make sure there is at least one valid file left
250 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
254 //check for required parameters
255 namefile = validParameter.validFile(parameters, "name", false);
256 if (namefile == "not found") { namefile = ""; }
258 m->splitAtDash(namefile, nameFileNames);
260 //go through files and make sure they are good, if not, then disregard them
261 for (int i = 0; i < nameFileNames.size(); i++) {
264 if (nameFileNames[i] == "current") {
265 nameFileNames[i] = m->getNameFile();
266 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
268 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
269 //erase from file list
270 nameFileNames.erase(nameFileNames.begin()+i);
277 if (inputDir != "") {
278 string path = m->hasPath(nameFileNames[i]);
279 //if the user has not given a path then, add inputdir. else leave path alone.
280 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
286 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
288 //if you can't open it, try default location
289 if (ableToOpen == 1) {
290 if (m->getDefaultPath() != "") { //default path is set
291 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
292 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
294 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
296 nameFileNames[i] = tryPath;
300 if (ableToOpen == 1) {
301 if (m->getOutputDir() != "") { //default path is set
302 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
303 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
305 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
307 nameFileNames[i] = tryPath;
313 if (ableToOpen == 1) {
314 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
315 //erase from file list
316 nameFileNames.erase(nameFileNames.begin()+i);
319 m->setNameFile(nameFileNames[i]);
325 if (nameFileNames.size() != 0) { hasName = true; }
327 //check for required parameters
328 vector<string> countfileNames;
329 countfile = validParameter.validFile(parameters, "count", false);
330 if (countfile == "not found") {
333 m->splitAtDash(countfile, countfileNames);
335 //go through files and make sure they are good, if not, then disregard them
336 for (int i = 0; i < countfileNames.size(); i++) {
339 if (countfileNames[i] == "current") {
340 countfileNames[i] = m->getCountTableFile();
341 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
343 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
344 //erase from file list
345 countfileNames.erase(countfileNames.begin()+i);
352 if (inputDir != "") {
353 string path = m->hasPath(countfileNames[i]);
354 //if the user has not given a path then, add inputdir. else leave path alone.
355 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
361 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
363 //if you can't open it, try default location
364 if (ableToOpen == 1) {
365 if (m->getDefaultPath() != "") { //default path is set
366 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
367 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
369 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
371 countfileNames[i] = tryPath;
375 if (ableToOpen == 1) {
376 if (m->getOutputDir() != "") { //default path is set
377 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
378 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
380 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
382 countfileNames[i] = tryPath;
388 if (ableToOpen == 1) {
389 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
390 //erase from file list
391 countfileNames.erase(countfileNames.begin()+i);
394 m->setCountTableFile(countfileNames[i]);
400 if (countfileNames.size() != 0) { hasCount = true; }
402 //make sure there is at least one valid file left
403 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
405 if (!hasName && hasCount) { nameFileNames = countfileNames; }
407 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
409 bool hasGroup = true;
410 groupfile = validParameter.validFile(parameters, "group", false);
411 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
413 m->splitAtDash(groupfile, groupFileNames);
415 //go through files and make sure they are good, if not, then disregard them
416 for (int i = 0; i < groupFileNames.size(); i++) {
419 if (groupFileNames[i] == "current") {
420 groupFileNames[i] = m->getGroupFile();
421 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
423 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
424 //erase from file list
425 groupFileNames.erase(groupFileNames.begin()+i);
432 if (inputDir != "") {
433 string path = m->hasPath(groupFileNames[i]);
434 //if the user has not given a path then, add inputdir. else leave path alone.
435 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
441 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
443 //if you can't open it, try default location
444 if (ableToOpen == 1) {
445 if (m->getDefaultPath() != "") { //default path is set
446 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
447 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
449 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
451 groupFileNames[i] = tryPath;
455 if (ableToOpen == 1) {
456 if (m->getOutputDir() != "") { //default path is set
457 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
458 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
460 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
462 groupFileNames[i] = tryPath;
468 if (ableToOpen == 1) {
469 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
470 //erase from file list
471 groupFileNames.erase(groupFileNames.begin()+i);
474 m->setGroupFile(groupFileNames[i]);
479 //make sure there is at least one valid file left
480 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
483 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
485 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
486 //if the user changes the output directory command factory will send this info to us in the output parameter
487 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
490 //if the user changes the output directory command factory will send this info to us in the output parameter
491 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
494 it = parameters.find("reference");
495 //user has given a template file
496 if(it != parameters.end()){
497 if (it->second == "self") { templatefile = "self"; }
499 path = m->hasPath(it->second);
500 //if the user has not given a path then, add inputdir. else leave path alone.
501 if (path == "") { parameters["reference"] = inputDir + it->second; }
503 templatefile = validParameter.validFile(parameters, "reference", true);
504 if (templatefile == "not open") { abort = true; }
505 else if (templatefile == "not found") { //check for saved reference sequences
506 if (rdb->getSavedReference() != "") {
507 templatefile = rdb->getSavedReference();
508 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
510 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
511 m->mothurOutEndLine();
516 }else if (hasName) { templatefile = "self"; }
517 else if (hasCount) { templatefile = "self"; }
519 if (rdb->getSavedReference() != "") {
520 templatefile = rdb->getSavedReference();
521 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
523 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
524 m->mothurOutEndLine();
525 templatefile = ""; abort = true;
529 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
530 m->setProcessors(temp);
531 m->mothurConvert(temp, processors);
533 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
534 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
536 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
537 chimealns = m->isTrue(temp);
539 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
540 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
541 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
542 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
543 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
544 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
545 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
546 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
547 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
548 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
549 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
550 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
552 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
553 ucl = m->isTrue(temp);
555 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
556 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
558 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
559 skipgaps = m->isTrue(temp);
561 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
562 skipgaps2 = m->isTrue(temp);
564 string usedDups = "false";
565 temp = validParameter.validFile(parameters, "dereplicate", false);
566 if (temp == "not found") {
567 if (groupfile != "") { temp = "false"; }
568 else { temp = "true"; usedDups = ""; }
570 dups = m->isTrue(temp);
573 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
574 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
576 //look for uchime exe
578 string tempPath = path;
579 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
580 path = path.substr(0, (tempPath.find_last_of('m')));
582 string uchimeCommand;
583 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
584 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
586 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
587 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
588 newCommand->execute();
590 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
591 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
592 newCommand->execute();
596 uchimeCommand = path + "uchime.exe";
599 //test to make sure uchime exists
601 uchimeCommand = m->getFullPathName(uchimeCommand);
602 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
603 if(ableToOpen == 1) {
604 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
605 //check to see if uchime is in the path??
607 string uLocation = m->findProgramPath("uchime");
611 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
612 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
614 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
617 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
618 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
619 }else { uchimeLocation = uchimeCommand; }
621 uchimeLocation = m->getFullPathName(uchimeLocation);
624 catch(exception& e) {
625 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
629 //***************************************************************************************************************
631 int ChimeraUchimeCommand::execute(){
634 if (abort == true) { if (calledHelp) { return 0; } return 2; }
636 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
638 for (int s = 0; s < fastaFileNames.size(); s++) {
640 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
642 int start = time(NULL);
643 string nameFile = "";
644 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
645 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
646 string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
647 string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns");
648 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
650 //you provided a groupfile
651 string groupFile = "";
652 bool hasGroup = false;
653 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
656 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
659 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
661 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
662 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
663 nameFile = nameFileNames[s];
664 }else { nameFile = getNamesFile(fastaFileNames[s]); }
666 map<string, string> seqs;
667 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
670 vector<seqPriorityNode> nameMapCount;
674 ct.readTable(nameFile);
675 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
676 int num = ct.getNumSeqs(it->first);
677 if (num == 0) { error = 1; }
679 seqPriorityNode temp(num, it->second, it->first);
680 nameMapCount.push_back(temp);
684 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
686 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
687 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
689 printFile(nameMapCount, newFasta);
690 fastaFileNames[s] = newFasta;
693 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
696 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
697 nameFile = nameFileNames[s];
698 }else { nameFile = getNamesFile(fastaFileNames[s]); }
700 //Parse sequences by group
701 vector<string> groups;
702 map<string, string> uniqueNames;
704 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
705 groups = cparser->getNamesOfGroups();
706 uniqueNames = cparser->getAllSeqsMap();
708 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
709 groups = sparser->getNamesOfGroups();
710 uniqueNames = sparser->getAllSeqsMap();
713 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
716 ofstream out, out1, out2;
717 m->openOutputFile(outputFileName, out); out.close();
718 m->openOutputFile(accnosFileName, out1); out1.close();
719 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
722 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
723 else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
725 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
726 if (hasCount) { delete cparser; }
727 else { delete sparser; }
730 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
732 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
733 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
736 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
739 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
744 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
745 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
749 m->openOutputFile(outputFileName+".temp", out);
750 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
753 m->appendFiles(outputFileName, outputFileName+".temp");
754 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
756 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
758 //remove file made for uchime
759 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
761 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
764 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
765 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
766 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
769 //set accnos file as new current accnosfile
771 itTypes = outputTypes.find("accnos");
772 if (itTypes != outputTypes.end()) {
773 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
776 m->mothurOutEndLine();
777 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
778 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
779 m->mothurOutEndLine();
784 catch(exception& e) {
785 m->errorOut(e, "ChimeraUchimeCommand", "execute");
789 //**********************************************************************************************************************
790 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
792 map<string, string>::iterator itUnique;
797 m->openInputFile(accnosFileName, in2);
800 m->openOutputFile(accnosFileName+".temp", out2);
803 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
804 set<string>::iterator itNames;
805 set<string> chimerasInFile;
806 set<string>::iterator itChimeras;
810 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
812 in2 >> name; m->gobble(in2);
815 itUnique = uniqueNames.find(name);
817 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
819 itChimeras = chimerasInFile.find((itUnique->second));
821 if (itChimeras == chimerasInFile.end()) {
822 out2 << itUnique->second << endl;
823 chimerasInFile.insert((itUnique->second));
831 m->mothurRemove(accnosFileName);
832 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
838 m->openInputFile(outputFileName, in);
841 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
842 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
845 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
848 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
849 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
850 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
851 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
856 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
859 in >> temp1; m->gobble(in);
860 in >> name; m->gobble(in);
861 in >> parent1; m->gobble(in);
862 in >> parent2; m->gobble(in);
863 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
866 //parse name - name will look like U68590/ab=1/
867 string restOfName = "";
868 int pos = name.find_first_of('/');
869 if (pos != string::npos) {
870 restOfName = name.substr(pos);
871 name = name.substr(0, pos);
875 itUnique = uniqueNames.find(name);
877 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
879 name = itUnique->second;
880 //is this name already in the file
881 itNames = namesInFile.find((name));
883 if (itNames == namesInFile.end()) { //no not in file
884 if (flag == "N") { //are you really a no??
885 //is this sequence really not chimeric??
886 itChimeras = chimerasInFile.find(name);
888 //then you really are a no so print, otherwise skip
889 if (itChimeras == chimerasInFile.end()) { print = true; }
890 }else{ print = true; }
895 out << temp1 << '\t' << name << restOfName << '\t';
896 namesInFile.insert(name);
898 //parse parent1 names
899 if (parent1 != "*") {
901 pos = parent1.find_first_of('/');
902 if (pos != string::npos) {
903 restOfName = parent1.substr(pos);
904 parent1 = parent1.substr(0, pos);
907 itUnique = uniqueNames.find(parent1);
908 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909 else { out << itUnique->second << restOfName << '\t'; }
910 }else { out << parent1 << '\t'; }
912 //parse parent2 names
913 if (parent2 != "*") {
915 pos = parent2.find_first_of('/');
916 if (pos != string::npos) {
917 restOfName = parent2.substr(pos);
918 parent2 = parent2.substr(0, pos);
921 itUnique = uniqueNames.find(parent2);
922 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
923 else { out << itUnique->second << restOfName << '\t'; }
924 }else { out << parent2 << '\t'; }
926 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
932 m->mothurRemove(outputFileName);
933 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
937 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
939 ------------------------------------------------------------------------
940 Query ( 179 nt) F21Fcsw_11639/ab=591/
941 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
942 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
944 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
945 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
946 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
947 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
948 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
949 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
951 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
952 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
953 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
954 Diffs NNN N N N N N BB NNN
955 Votes 000 0 0 0 0 0 ++ 000
956 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
958 A 159 TGGAACTGAGACACGGTCCAA 179
959 Q 159 TGGAACTGAGACACGGTCCAA 179
960 B 161 TGGAACTGAGACACGGTCCAA 181
963 Model BBBBBBBBBBBBBBBBBBBBB
965 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
966 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
970 m->openInputFile(alnsFileName, in3);
973 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
980 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
983 line = m->getline(in3);
987 istringstream iss(line);
990 //are you a name line
991 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
993 for (int i = 0; i < line.length(); i++) {
995 if (line[i] == ')') { break; }
996 else { out3 << line[i]; }
999 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1000 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1002 out << line[spot] << line[spot+1];
1004 name = line.substr(spot+2);
1006 //parse name - name will either look like U68590/ab=1/ or U68590
1007 string restOfName = "";
1008 int pos = name.find_first_of('/');
1009 if (pos != string::npos) {
1010 restOfName = name.substr(pos);
1011 name = name.substr(0, pos);
1015 itUnique = uniqueNames.find(name);
1017 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1019 //only limit repeats on query names
1020 if (temp == "Query") {
1021 itNames = namesInFile.find((itUnique->second));
1023 if (itNames == namesInFile.end()) {
1024 out << itUnique->second << restOfName << endl;
1025 namesInFile.insert((itUnique->second));
1027 }else { out << itUnique->second << restOfName << endl; }
1032 }else { //not need to alter line
1033 out3 << line << endl;
1035 }else { out3 << endl; }
1040 m->mothurRemove(alnsFileName);
1041 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1046 catch(exception& e) {
1047 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1051 //**********************************************************************************************************************
1052 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1055 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1058 m->openOutputFile(filename, out);
1060 //print new file in order of
1061 for (int i = 0; i < nameMapCount.size(); i++) {
1062 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1068 catch(exception& e) {
1069 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1073 //**********************************************************************************************************************
1074 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1076 //create input file for uchime
1077 //read through fastafile and store info
1079 m->openInputFile(filename, in);
1083 if (m->control_pressed) { in.close(); return 0; }
1085 Sequence seq(in); m->gobble(in);
1086 seqs[seq.getName()] = seq.getAligned();
1092 catch(exception& e) {
1093 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1097 //**********************************************************************************************************************
1099 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1101 string nameFile = "";
1103 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1105 //use unique.seqs to create new name and fastafile
1106 string inputString = "fasta=" + inputFile;
1107 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1108 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1109 m->mothurCalling = true;
1111 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1112 uniqueCommand->execute();
1114 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1116 delete uniqueCommand;
1117 m->mothurCalling = false;
1118 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1120 nameFile = filenames["name"][0];
1121 inputFile = filenames["fasta"][0];
1125 catch(exception& e) {
1126 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1130 //**********************************************************************************************************************
1131 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1135 int numChimeras = 0;
1137 for (int i = start; i < end; i++) {
1138 int start = time(NULL); if (m->control_pressed) { return 0; }
1141 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1142 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1144 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1145 totalSeqs += numSeqs;
1147 if (m->control_pressed) { return 0; }
1149 //remove file made for uchime
1150 if (!m->debug) { m->mothurRemove(filename); }
1151 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1154 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1155 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1156 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1158 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1163 catch(exception& e) {
1164 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1168 //**********************************************************************************************************************
1170 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1173 outputFName = m->getFullPathName(outputFName);
1174 filename = m->getFullPathName(filename);
1175 alns = m->getFullPathName(alns);
1177 //to allow for spaces in the path
1178 outputFName = "\"" + outputFName + "\"";
1179 filename = "\"" + filename + "\"";
1180 alns = "\"" + alns + "\"";
1182 vector<char*> cPara;
1184 string uchimeCommand = uchimeLocation;
1185 uchimeCommand = "\"" + uchimeCommand + "\" ";
1188 tempUchime= new char[uchimeCommand.length()+1];
1190 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1191 cPara.push_back(tempUchime);
1193 //are you using a reference file
1194 if (templatefile != "self") {
1195 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1196 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1197 filename = outputFileName;
1198 filename = "\"" + filename + "\"";
1199 //add reference file
1200 char* tempRef = new char[5];
1201 //strcpy(tempRef, "--db");
1202 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1203 cPara.push_back(tempRef);
1204 char* tempR = new char[templatefile.length()+1];
1205 //strcpy(tempR, templatefile.c_str());
1206 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1207 cPara.push_back(tempR);
1210 char* tempIn = new char[8];
1211 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1212 //strcpy(tempIn, "--input");
1213 cPara.push_back(tempIn);
1214 char* temp = new char[filename.length()+1];
1215 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1216 //strcpy(temp, filename.c_str());
1217 cPara.push_back(temp);
1219 char* tempO = new char[12];
1220 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1221 //strcpy(tempO, "--uchimeout");
1222 cPara.push_back(tempO);
1223 char* tempout = new char[outputFName.length()+1];
1224 //strcpy(tempout, outputFName.c_str());
1225 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1226 cPara.push_back(tempout);
1229 char* tempA = new char[13];
1230 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1231 //strcpy(tempA, "--uchimealns");
1232 cPara.push_back(tempA);
1233 char* tempa = new char[alns.length()+1];
1234 //strcpy(tempa, alns.c_str());
1235 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1236 cPara.push_back(tempa);
1240 char* tempskew = new char[9];
1241 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1242 //strcpy(tempskew, "--abskew");
1243 cPara.push_back(tempskew);
1244 char* tempSkew = new char[abskew.length()+1];
1245 //strcpy(tempSkew, abskew.c_str());
1246 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1247 cPara.push_back(tempSkew);
1251 char* tempminh = new char[7];
1252 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1253 //strcpy(tempminh, "--minh");
1254 cPara.push_back(tempminh);
1255 char* tempMinH = new char[minh.length()+1];
1256 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1257 //strcpy(tempMinH, minh.c_str());
1258 cPara.push_back(tempMinH);
1262 char* tempmindiv = new char[9];
1263 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1264 //strcpy(tempmindiv, "--mindiv");
1265 cPara.push_back(tempmindiv);
1266 char* tempMindiv = new char[mindiv.length()+1];
1267 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1268 //strcpy(tempMindiv, mindiv.c_str());
1269 cPara.push_back(tempMindiv);
1273 char* tempxn = new char[5];
1274 //strcpy(tempxn, "--xn");
1275 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1276 cPara.push_back(tempxn);
1277 char* tempXn = new char[xn.length()+1];
1278 //strcpy(tempXn, xn.c_str());
1279 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1280 cPara.push_back(tempXn);
1284 char* tempdn = new char[5];
1285 //strcpy(tempdn, "--dn");
1286 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1287 cPara.push_back(tempdn);
1288 char* tempDn = new char[dn.length()+1];
1289 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1290 //strcpy(tempDn, dn.c_str());
1291 cPara.push_back(tempDn);
1295 char* tempxa = new char[5];
1296 //strcpy(tempxa, "--xa");
1297 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1298 cPara.push_back(tempxa);
1299 char* tempXa = new char[xa.length()+1];
1300 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1301 //strcpy(tempXa, xa.c_str());
1302 cPara.push_back(tempXa);
1306 char* tempchunks = new char[9];
1307 //strcpy(tempchunks, "--chunks");
1308 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1309 cPara.push_back(tempchunks);
1310 char* tempChunks = new char[chunks.length()+1];
1311 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1312 //strcpy(tempChunks, chunks.c_str());
1313 cPara.push_back(tempChunks);
1317 char* tempminchunk = new char[11];
1318 //strcpy(tempminchunk, "--minchunk");
1319 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1320 cPara.push_back(tempminchunk);
1321 char* tempMinchunk = new char[minchunk.length()+1];
1322 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1323 //strcpy(tempMinchunk, minchunk.c_str());
1324 cPara.push_back(tempMinchunk);
1327 if (useIdsmoothwindow) {
1328 char* tempidsmoothwindow = new char[17];
1329 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1330 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1331 cPara.push_back(tempidsmoothwindow);
1332 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1333 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1334 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1335 cPara.push_back(tempIdsmoothwindow);
1338 /*if (useMinsmoothid) {
1339 char* tempminsmoothid = new char[14];
1340 //strcpy(tempminsmoothid, "--minsmoothid");
1341 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1342 cPara.push_back(tempminsmoothid);
1343 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1344 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1345 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1346 cPara.push_back(tempMinsmoothid);
1350 char* tempmaxp = new char[7];
1351 //strcpy(tempmaxp, "--maxp");
1352 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1353 cPara.push_back(tempmaxp);
1354 char* tempMaxp = new char[maxp.length()+1];
1355 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1356 //strcpy(tempMaxp, maxp.c_str());
1357 cPara.push_back(tempMaxp);
1361 char* tempskipgaps = new char[13];
1362 //strcpy(tempskipgaps, "--[no]skipgaps");
1363 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1364 cPara.push_back(tempskipgaps);
1368 char* tempskipgaps2 = new char[14];
1369 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1370 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1371 cPara.push_back(tempskipgaps2);
1375 char* tempminlen = new char[9];
1376 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1377 //strcpy(tempminlen, "--minlen");
1378 cPara.push_back(tempminlen);
1379 char* tempMinlen = new char[minlen.length()+1];
1380 //strcpy(tempMinlen, minlen.c_str());
1381 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1382 cPara.push_back(tempMinlen);
1386 char* tempmaxlen = new char[9];
1387 //strcpy(tempmaxlen, "--maxlen");
1388 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1389 cPara.push_back(tempmaxlen);
1390 char* tempMaxlen = new char[maxlen.length()+1];
1391 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1392 //strcpy(tempMaxlen, maxlen.c_str());
1393 cPara.push_back(tempMaxlen);
1397 char* tempucl = new char[5];
1398 strcpy(tempucl, "--ucl");
1399 cPara.push_back(tempucl);
1402 if (useQueryfract) {
1403 char* tempqueryfract = new char[13];
1404 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1405 //strcpy(tempqueryfract, "--queryfract");
1406 cPara.push_back(tempqueryfract);
1407 char* tempQueryfract = new char[queryfract.length()+1];
1408 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1409 //strcpy(tempQueryfract, queryfract.c_str());
1410 cPara.push_back(tempQueryfract);
1414 char** uchimeParameters;
1415 uchimeParameters = new char*[cPara.size()];
1416 string commandString = "";
1417 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1418 //int numArgs = cPara.size();
1420 //uchime_main(numArgs, uchimeParameters);
1421 //cout << "commandString = " << commandString << endl;
1422 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1424 commandString = "\"" + commandString + "\"";
1426 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1427 system(commandString.c_str());
1430 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1431 delete[] uchimeParameters;
1433 //remove "" from filenames
1434 outputFName = outputFName.substr(1, outputFName.length()-2);
1435 filename = filename.substr(1, filename.length()-2);
1436 alns = alns.substr(1, alns.length()-2);
1438 if (m->control_pressed) { return 0; }
1440 //create accnos file from uchime results
1442 m->openInputFile(outputFName, in);
1445 m->openOutputFile(accnos, out);
1451 if (m->control_pressed) { break; }
1454 string chimeraFlag = "";
1455 in >> chimeraFlag >> name;
1457 //fix name if needed
1458 if (templatefile == "self") {
1459 name = name.substr(0, name.length()-1); //rip off last /
1460 name = name.substr(0, name.find_last_of('/'));
1463 for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1466 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1472 //if (templatefile != "self") { m->mothurRemove(filename); }
1476 catch(exception& e) {
1477 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1481 /**************************************************************************************************/
1482 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1483 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1487 m->openInputFile(filename, in);
1490 m->openOutputFile(output, out);
1493 if (m->control_pressed) { break; }
1495 Sequence seq(in); m->gobble(in);
1497 if (seq.getName() != "") { seq.printSequence(out); }
1504 catch(exception& e) {
1505 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1509 /**************************************************************************************************/
1511 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1517 vector<string> files;
1519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1520 //break up file into multiple files
1521 m->divideFile(filename, processors, files);
1523 if (m->control_pressed) { return 0; }
1525 //loop through and create all the processes you want
1526 while (process != processors) {
1530 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1532 }else if (pid == 0){
1533 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1535 //pass numSeqs to parent
1537 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1538 m->openOutputFile(tempFile, out);
1540 out << numChimeras << endl;
1545 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1546 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1552 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1554 //force parent to wait until all the processes are done
1555 for (int i=0;i<processIDS.size();i++) {
1556 int temp = processIDS[i];
1560 for (int i = 0; i < processIDS.size(); i++) {
1562 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1563 m->openInputFile(tempFile, in);
1566 in >> tempNum; m->gobble(in);
1569 numChimeras += tempNum;
1571 in.close(); m->mothurRemove(tempFile);
1574 //////////////////////////////////////////////////////////////////////////////////////////////////////
1575 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1576 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1577 //////////////////////////////////////////////////////////////////////////////////////////////////////
1582 map<int, ofstream*> filehandles;
1583 map<int, ofstream*>::iterator it3;
1586 for (int i = 0; i < processors; i++) {
1587 temp = new ofstream;
1588 filehandles[i] = temp;
1589 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1590 files.push_back(filename+toString(i)+".temp");
1594 m->openInputFile(filename, in);
1598 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1600 Sequence tempSeq(in); m->gobble(in);
1602 if (tempSeq.getName() != "") {
1603 tempSeq.printSequence(*(filehandles[spot]));
1605 if (spot == processors) { spot = 0; }
1611 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1612 (*(it3->second)).close();
1616 //sanity check for number of processors
1617 if (count < processors) { processors = count; }
1619 vector<uchimeData*> pDataArray;
1620 DWORD dwThreadIdArray[processors-1];
1621 HANDLE hThreadArray[processors-1];
1622 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1624 //Create processor worker threads.
1625 for( int i=1; i<processors; i++ ){
1626 // Allocate memory for thread data.
1627 string extension = toString(i) + ".temp";
1629 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1630 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1631 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1633 pDataArray.push_back(tempUchime);
1634 processIDS.push_back(i);
1636 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1637 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1638 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1642 //using the main process as a worker saves time and memory
1643 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1645 //Wait until all threads have terminated.
1646 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1648 //Close all thread handles and free memory allocations.
1649 for(int i=0; i < pDataArray.size(); i++){
1650 num += pDataArray[i]->count;
1651 numChimeras += pDataArray[i]->numChimeras;
1652 CloseHandle(hThreadArray[i]);
1653 delete pDataArray[i];
1657 //append output files
1658 for(int i=0;i<processIDS.size();i++){
1659 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1660 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1662 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1663 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1666 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1667 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1671 //get rid of the file pieces.
1672 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1675 catch(exception& e) {
1676 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1680 /**************************************************************************************************/
1682 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1690 if (groups.size() < processors) { processors = groups.size(); }
1692 //divide the groups between the processors
1693 vector<linePair> lines;
1694 int numGroupsPerProcessor = groups.size() / processors;
1695 for (int i = 0; i < processors; i++) {
1696 int startIndex = i * numGroupsPerProcessor;
1697 int endIndex = (i+1) * numGroupsPerProcessor;
1698 if(i == (processors - 1)){ endIndex = groups.size(); }
1699 lines.push_back(linePair(startIndex, endIndex));
1702 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1704 //loop through and create all the processes you want
1705 while (process != processors) {
1709 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1711 }else if (pid == 0){
1712 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1714 //pass numSeqs to parent
1716 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1717 m->openOutputFile(tempFile, out);
1723 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1724 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1730 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1732 //force parent to wait until all the processes are done
1733 for (int i=0;i<processIDS.size();i++) {
1734 int temp = processIDS[i];
1738 for (int i = 0; i < processIDS.size(); i++) {
1740 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1741 m->openInputFile(tempFile, in);
1742 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1743 in.close(); m->mothurRemove(tempFile);
1747 //////////////////////////////////////////////////////////////////////////////////////////////////////
1748 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1749 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1750 //////////////////////////////////////////////////////////////////////////////////////////////////////
1752 vector<uchimeData*> pDataArray;
1753 DWORD dwThreadIdArray[processors-1];
1754 HANDLE hThreadArray[processors-1];
1756 //Create processor worker threads.
1757 for( int i=1; i<processors; i++ ){
1758 // Allocate memory for thread data.
1759 string extension = toString(i) + ".temp";
1761 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1762 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1763 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1765 pDataArray.push_back(tempUchime);
1766 processIDS.push_back(i);
1768 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1769 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1770 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1774 //using the main process as a worker saves time and memory
1775 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1777 //Wait until all threads have terminated.
1778 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1780 //Close all thread handles and free memory allocations.
1781 for(int i=0; i < pDataArray.size(); i++){
1782 num += pDataArray[i]->count;
1783 CloseHandle(hThreadArray[i]);
1784 delete pDataArray[i];
1789 //append output files
1790 for(int i=0;i<processIDS.size();i++){
1791 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1792 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1794 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1795 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1798 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1799 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1806 catch(exception& e) {
1807 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1811 /**************************************************************************************************/