2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
118 catch(exception& e) {
119 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
123 //**********************************************************************************************************************
124 ChimeraUchimeCommand::ChimeraUchimeCommand(){
126 abort = true; calledHelp = true;
128 vector<string> tempOutNames;
129 outputTypes["chimera"] = tempOutNames;
130 outputTypes["accnos"] = tempOutNames;
131 outputTypes["alns"] = tempOutNames;
133 catch(exception& e) {
134 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
138 //***************************************************************************************************************
139 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
141 abort = false; calledHelp = false; hasName=false; hasCount=false;
142 ReferenceDB* rdb = ReferenceDB::getInstance();
144 //allow user to run help
145 if(option == "help") { help(); abort = true; calledHelp = true; }
146 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
149 vector<string> myArray = setParameters();
151 OptionParser parser(option);
152 map<string,string> parameters = parser.getParameters();
154 ValidParameters validParameter("chimera.uchime");
155 map<string,string>::iterator it;
157 //check to make sure all parameters are valid for command
158 for (it = parameters.begin(); it != parameters.end(); it++) {
159 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
162 vector<string> tempOutNames;
163 outputTypes["chimera"] = tempOutNames;
164 outputTypes["accnos"] = tempOutNames;
165 outputTypes["alns"] = tempOutNames;
167 //if the user changes the input directory command factory will send this info to us in the output parameter
168 string inputDir = validParameter.validFile(parameters, "inputdir", false);
169 if (inputDir == "not found"){ inputDir = ""; }
171 //check for required parameters
172 fastafile = validParameter.validFile(parameters, "fasta", false);
173 if (fastafile == "not found") {
174 //if there is a current fasta file, use it
175 string filename = m->getFastaFile();
176 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
177 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179 m->splitAtDash(fastafile, fastaFileNames);
181 //go through files and make sure they are good, if not, then disregard them
182 for (int i = 0; i < fastaFileNames.size(); i++) {
185 if (fastaFileNames[i] == "current") {
186 fastaFileNames[i] = m->getFastaFile();
187 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
189 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
190 //erase from file list
191 fastaFileNames.erase(fastaFileNames.begin()+i);
198 if (inputDir != "") {
199 string path = m->hasPath(fastaFileNames[i]);
200 //if the user has not given a path then, add inputdir. else leave path alone.
201 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
207 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
209 //if you can't open it, try default location
210 if (ableToOpen == 1) {
211 if (m->getDefaultPath() != "") { //default path is set
212 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
213 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
215 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
217 fastaFileNames[i] = tryPath;
221 if (ableToOpen == 1) {
222 if (m->getOutputDir() != "") { //default path is set
223 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
224 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
226 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
228 fastaFileNames[i] = tryPath;
234 if (ableToOpen == 1) {
235 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
236 //erase from file list
237 fastaFileNames.erase(fastaFileNames.begin()+i);
240 m->setFastaFile(fastaFileNames[i]);
245 //make sure there is at least one valid file left
246 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
250 //check for required parameters
251 namefile = validParameter.validFile(parameters, "name", false);
252 if (namefile == "not found") { namefile = ""; }
254 m->splitAtDash(namefile, nameFileNames);
256 //go through files and make sure they are good, if not, then disregard them
257 for (int i = 0; i < nameFileNames.size(); i++) {
260 if (nameFileNames[i] == "current") {
261 nameFileNames[i] = m->getNameFile();
262 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
264 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
265 //erase from file list
266 nameFileNames.erase(nameFileNames.begin()+i);
273 if (inputDir != "") {
274 string path = m->hasPath(nameFileNames[i]);
275 //if the user has not given a path then, add inputdir. else leave path alone.
276 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
282 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
284 //if you can't open it, try default location
285 if (ableToOpen == 1) {
286 if (m->getDefaultPath() != "") { //default path is set
287 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
288 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
290 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
292 nameFileNames[i] = tryPath;
296 if (ableToOpen == 1) {
297 if (m->getOutputDir() != "") { //default path is set
298 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
299 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
301 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
303 nameFileNames[i] = tryPath;
309 if (ableToOpen == 1) {
310 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
311 //erase from file list
312 nameFileNames.erase(nameFileNames.begin()+i);
315 m->setNameFile(nameFileNames[i]);
321 if (nameFileNames.size() != 0) { hasName = true; }
323 //check for required parameters
324 vector<string> countfileNames;
325 countfile = validParameter.validFile(parameters, "count", false);
326 if (countfile == "not found") {
329 m->splitAtDash(countfile, countfileNames);
331 //go through files and make sure they are good, if not, then disregard them
332 for (int i = 0; i < countfileNames.size(); i++) {
335 if (countfileNames[i] == "current") {
336 countfileNames[i] = m->getCountTableFile();
337 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
339 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
340 //erase from file list
341 countfileNames.erase(countfileNames.begin()+i);
348 if (inputDir != "") {
349 string path = m->hasPath(countfileNames[i]);
350 //if the user has not given a path then, add inputdir. else leave path alone.
351 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
357 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
359 //if you can't open it, try default location
360 if (ableToOpen == 1) {
361 if (m->getDefaultPath() != "") { //default path is set
362 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
363 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
365 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
367 countfileNames[i] = tryPath;
371 if (ableToOpen == 1) {
372 if (m->getOutputDir() != "") { //default path is set
373 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
374 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
376 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
378 countfileNames[i] = tryPath;
384 if (ableToOpen == 1) {
385 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
386 //erase from file list
387 countfileNames.erase(countfileNames.begin()+i);
390 m->setCountTableFile(countfileNames[i]);
396 if (countfileNames.size() != 0) { hasCount = true; }
398 //make sure there is at least one valid file left
399 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
401 if (!hasName && hasCount) { nameFileNames = countfileNames; }
403 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
405 bool hasGroup = true;
406 groupfile = validParameter.validFile(parameters, "group", false);
407 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
409 m->splitAtDash(groupfile, groupFileNames);
411 //go through files and make sure they are good, if not, then disregard them
412 for (int i = 0; i < groupFileNames.size(); i++) {
415 if (groupFileNames[i] == "current") {
416 groupFileNames[i] = m->getGroupFile();
417 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
419 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
420 //erase from file list
421 groupFileNames.erase(groupFileNames.begin()+i);
428 if (inputDir != "") {
429 string path = m->hasPath(groupFileNames[i]);
430 //if the user has not given a path then, add inputdir. else leave path alone.
431 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
437 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
439 //if you can't open it, try default location
440 if (ableToOpen == 1) {
441 if (m->getDefaultPath() != "") { //default path is set
442 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
443 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
445 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
447 groupFileNames[i] = tryPath;
451 if (ableToOpen == 1) {
452 if (m->getOutputDir() != "") { //default path is set
453 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
454 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
456 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
458 groupFileNames[i] = tryPath;
464 if (ableToOpen == 1) {
465 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
466 //erase from file list
467 groupFileNames.erase(groupFileNames.begin()+i);
470 m->setGroupFile(groupFileNames[i]);
475 //make sure there is at least one valid file left
476 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
479 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
481 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
482 //if the user changes the output directory command factory will send this info to us in the output parameter
483 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
486 //if the user changes the output directory command factory will send this info to us in the output parameter
487 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
490 it = parameters.find("reference");
491 //user has given a template file
492 if(it != parameters.end()){
493 if (it->second == "self") { templatefile = "self"; }
495 path = m->hasPath(it->second);
496 //if the user has not given a path then, add inputdir. else leave path alone.
497 if (path == "") { parameters["reference"] = inputDir + it->second; }
499 templatefile = validParameter.validFile(parameters, "reference", true);
500 if (templatefile == "not open") { abort = true; }
501 else if (templatefile == "not found") { //check for saved reference sequences
502 if (rdb->getSavedReference() != "") {
503 templatefile = rdb->getSavedReference();
504 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
506 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
507 m->mothurOutEndLine();
512 }else if (hasName) { templatefile = "self"; }
513 else if (hasCount) { templatefile = "self"; }
515 if (rdb->getSavedReference() != "") {
516 templatefile = rdb->getSavedReference();
517 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
519 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
520 m->mothurOutEndLine();
521 templatefile = ""; abort = true;
525 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
526 m->setProcessors(temp);
527 m->mothurConvert(temp, processors);
529 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
530 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
532 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
533 chimealns = m->isTrue(temp);
535 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
536 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
537 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
538 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
539 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
540 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
541 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
542 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
543 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
544 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
545 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
546 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
548 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
550 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
551 ucl = m->isTrue(temp);
553 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
554 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
556 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
557 skipgaps = m->isTrue(temp);
559 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
560 skipgaps2 = m->isTrue(temp);
563 temp = validParameter.validFile(parameters, "dereplicate", false);
564 if (temp == "not found") {
565 if (groupfile != "") { temp = "false"; }
566 else { temp = "true"; }
568 dups = m->isTrue(temp);
571 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
572 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
574 //look for uchime exe
576 string tempPath = path;
577 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
578 path = path.substr(0, (tempPath.find_last_of('m')));
580 string uchimeCommand;
581 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
582 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
584 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
585 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
586 newCommand->execute();
588 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
589 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
590 newCommand->execute();
594 uchimeCommand = path + "uchime.exe";
597 //test to make sure uchime exists
599 uchimeCommand = m->getFullPathName(uchimeCommand);
600 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
601 if(ableToOpen == 1) {
602 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
603 //check to see if uchime is in the path??
605 string uLocation = m->findProgramPath("uchime");
609 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
610 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
612 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
615 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
616 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
617 }else { uchimeLocation = uchimeCommand; }
619 uchimeLocation = m->getFullPathName(uchimeLocation);
622 catch(exception& e) {
623 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
627 //***************************************************************************************************************
629 int ChimeraUchimeCommand::execute(){
632 if (abort == true) { if (calledHelp) { return 0; } return 2; }
634 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
636 for (int s = 0; s < fastaFileNames.size(); s++) {
638 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
640 int start = time(NULL);
641 string nameFile = "";
642 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
643 map<string, string> variables;
644 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
645 string outputFileName = getOutputFileName("chimera", variables);
646 string accnosFileName = getOutputFileName("accnos", variables);
647 string alnsFileName = getOutputFileName("alns", variables);
648 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
650 //you provided a groupfile
651 string groupFile = "";
652 bool hasGroup = false;
653 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
656 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
659 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
661 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
662 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
663 nameFile = nameFileNames[s];
664 }else { nameFile = getNamesFile(fastaFileNames[s]); }
666 map<string, string> seqs;
667 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
670 vector<seqPriorityNode> nameMapCount;
674 ct.readTable(nameFile);
675 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
676 int num = ct.getNumSeqs(it->first);
677 if (num == 0) { error = 1; }
679 seqPriorityNode temp(num, it->second, it->first);
680 nameMapCount.push_back(temp);
684 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
686 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
687 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
689 printFile(nameMapCount, newFasta);
690 fastaFileNames[s] = newFasta;
693 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
696 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
697 nameFile = nameFileNames[s];
698 }else { nameFile = getNamesFile(fastaFileNames[s]); }
700 //Parse sequences by group
701 vector<string> groups;
702 map<string, string> uniqueNames;
704 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
705 groups = cparser->getNamesOfGroups();
706 uniqueNames = cparser->getAllSeqsMap();
708 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
709 groups = sparser->getNamesOfGroups();
710 uniqueNames = sparser->getAllSeqsMap();
713 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
716 ofstream out, out1, out2;
717 m->openOutputFile(outputFileName, out); out.close();
718 m->openOutputFile(accnosFileName, out1); out1.close();
719 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
722 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); }
723 else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); }
725 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
726 if (hasCount) { delete cparser; }
727 else { delete sparser; }
730 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
732 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
733 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
736 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
739 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
744 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
745 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
749 m->openOutputFile(outputFileName+".temp", out);
750 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
753 m->appendFiles(outputFileName, outputFileName+".temp");
754 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
756 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
758 //remove file made for uchime
759 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
761 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
764 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
765 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
766 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
769 //set accnos file as new current accnosfile
771 itTypes = outputTypes.find("accnos");
772 if (itTypes != outputTypes.end()) {
773 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
776 m->mothurOutEndLine();
777 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
778 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
779 m->mothurOutEndLine();
784 catch(exception& e) {
785 m->errorOut(e, "ChimeraUchimeCommand", "execute");
789 //**********************************************************************************************************************
790 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
792 map<string, string>::iterator itUnique;
797 m->openInputFile(accnosFileName, in2);
800 m->openOutputFile(accnosFileName+".temp", out2);
803 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
804 set<string>::iterator itNames;
805 set<string> chimerasInFile;
806 set<string>::iterator itChimeras;
810 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
812 in2 >> name; m->gobble(in2);
815 itUnique = uniqueNames.find(name);
817 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
819 itChimeras = chimerasInFile.find((itUnique->second));
821 if (itChimeras == chimerasInFile.end()) {
822 out2 << itUnique->second << endl;
823 chimerasInFile.insert((itUnique->second));
831 m->mothurRemove(accnosFileName);
832 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
838 m->openInputFile(outputFileName, in);
841 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
842 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
845 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
848 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
849 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
850 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
851 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
856 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
859 in >> temp1; m->gobble(in);
860 in >> name; m->gobble(in);
861 in >> parent1; m->gobble(in);
862 in >> parent2; m->gobble(in);
863 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
866 //parse name - name will look like U68590/ab=1/
867 string restOfName = "";
868 int pos = name.find_first_of('/');
869 if (pos != string::npos) {
870 restOfName = name.substr(pos);
871 name = name.substr(0, pos);
875 itUnique = uniqueNames.find(name);
877 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
879 name = itUnique->second;
880 //is this name already in the file
881 itNames = namesInFile.find((name));
883 if (itNames == namesInFile.end()) { //no not in file
884 if (flag == "N") { //are you really a no??
885 //is this sequence really not chimeric??
886 itChimeras = chimerasInFile.find(name);
888 //then you really are a no so print, otherwise skip
889 if (itChimeras == chimerasInFile.end()) { print = true; }
890 }else{ print = true; }
895 out << temp1 << '\t' << name << restOfName << '\t';
896 namesInFile.insert(name);
898 //parse parent1 names
899 if (parent1 != "*") {
901 pos = parent1.find_first_of('/');
902 if (pos != string::npos) {
903 restOfName = parent1.substr(pos);
904 parent1 = parent1.substr(0, pos);
907 itUnique = uniqueNames.find(parent1);
908 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909 else { out << itUnique->second << restOfName << '\t'; }
910 }else { out << parent1 << '\t'; }
912 //parse parent2 names
913 if (parent2 != "*") {
915 pos = parent2.find_first_of('/');
916 if (pos != string::npos) {
917 restOfName = parent2.substr(pos);
918 parent2 = parent2.substr(0, pos);
921 itUnique = uniqueNames.find(parent2);
922 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
923 else { out << itUnique->second << restOfName << '\t'; }
924 }else { out << parent2 << '\t'; }
926 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
932 m->mothurRemove(outputFileName);
933 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
937 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
939 ------------------------------------------------------------------------
940 Query ( 179 nt) F21Fcsw_11639/ab=591/
941 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
942 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
944 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
945 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
946 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
947 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
948 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
949 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
951 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
952 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
953 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
954 Diffs NNN N N N N N BB NNN
955 Votes 000 0 0 0 0 0 ++ 000
956 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
958 A 159 TGGAACTGAGACACGGTCCAA 179
959 Q 159 TGGAACTGAGACACGGTCCAA 179
960 B 161 TGGAACTGAGACACGGTCCAA 181
963 Model BBBBBBBBBBBBBBBBBBBBB
965 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
966 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
970 m->openInputFile(alnsFileName, in3);
973 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
980 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
983 line = m->getline(in3);
987 istringstream iss(line);
990 //are you a name line
991 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
993 for (int i = 0; i < line.length(); i++) {
995 if (line[i] == ')') { break; }
996 else { out3 << line[i]; }
999 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1000 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1002 out << line[spot] << line[spot+1];
1004 name = line.substr(spot+2);
1006 //parse name - name will either look like U68590/ab=1/ or U68590
1007 string restOfName = "";
1008 int pos = name.find_first_of('/');
1009 if (pos != string::npos) {
1010 restOfName = name.substr(pos);
1011 name = name.substr(0, pos);
1015 itUnique = uniqueNames.find(name);
1017 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1019 //only limit repeats on query names
1020 if (temp == "Query") {
1021 itNames = namesInFile.find((itUnique->second));
1023 if (itNames == namesInFile.end()) {
1024 out << itUnique->second << restOfName << endl;
1025 namesInFile.insert((itUnique->second));
1027 }else { out << itUnique->second << restOfName << endl; }
1032 }else { //not need to alter line
1033 out3 << line << endl;
1035 }else { out3 << endl; }
1040 m->mothurRemove(alnsFileName);
1041 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1046 catch(exception& e) {
1047 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1051 //**********************************************************************************************************************
1052 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1055 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1058 m->openOutputFile(filename, out);
1060 //print new file in order of
1061 for (int i = 0; i < nameMapCount.size(); i++) {
1062 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1068 catch(exception& e) {
1069 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1073 //**********************************************************************************************************************
1074 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1076 //create input file for uchime
1077 //read through fastafile and store info
1079 m->openInputFile(filename, in);
1083 if (m->control_pressed) { in.close(); return 0; }
1085 Sequence seq(in); m->gobble(in);
1086 seqs[seq.getName()] = seq.getAligned();
1092 catch(exception& e) {
1093 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1097 //**********************************************************************************************************************
1099 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1101 string nameFile = "";
1103 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1105 //use unique.seqs to create new name and fastafile
1106 string inputString = "fasta=" + inputFile;
1107 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1108 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1109 m->mothurCalling = true;
1111 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1112 uniqueCommand->execute();
1114 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1116 delete uniqueCommand;
1117 m->mothurCalling = false;
1118 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1120 nameFile = filenames["name"][0];
1121 inputFile = filenames["fasta"][0];
1125 catch(exception& e) {
1126 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1130 //**********************************************************************************************************************
1131 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1135 int numChimeras = 0;
1137 for (int i = start; i < end; i++) {
1138 int start = time(NULL); if (m->control_pressed) { return 0; }
1141 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1142 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1144 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1145 totalSeqs += numSeqs;
1147 if (m->control_pressed) { return 0; }
1149 //remove file made for uchime
1150 if (!m->debug) { m->mothurRemove(filename); }
1151 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1154 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1155 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1156 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1158 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1163 catch(exception& e) {
1164 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1168 //**********************************************************************************************************************
1170 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1173 outputFName = m->getFullPathName(outputFName);
1174 filename = m->getFullPathName(filename);
1175 alns = m->getFullPathName(alns);
1177 //to allow for spaces in the path
1178 outputFName = "\"" + outputFName + "\"";
1179 filename = "\"" + filename + "\"";
1180 alns = "\"" + alns + "\"";
1182 vector<char*> cPara;
1184 string uchimeCommand = uchimeLocation;
1185 uchimeCommand = "\"" + uchimeCommand + "\" ";
1188 tempUchime= new char[uchimeCommand.length()+1];
1190 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1191 cPara.push_back(tempUchime);
1193 //are you using a reference file
1194 if (templatefile != "self") {
1195 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1196 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1197 filename = outputFileName;
1198 filename = "\"" + filename + "\"";
1199 //add reference file
1200 char* tempRef = new char[5];
1201 //strcpy(tempRef, "--db");
1202 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1203 cPara.push_back(tempRef);
1204 char* tempR = new char[templatefile.length()+1];
1205 //strcpy(tempR, templatefile.c_str());
1206 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1207 cPara.push_back(tempR);
1210 char* tempIn = new char[8];
1211 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1212 //strcpy(tempIn, "--input");
1213 cPara.push_back(tempIn);
1214 char* temp = new char[filename.length()+1];
1215 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1216 //strcpy(temp, filename.c_str());
1217 cPara.push_back(temp);
1219 char* tempO = new char[12];
1220 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1221 //strcpy(tempO, "--uchimeout");
1222 cPara.push_back(tempO);
1223 char* tempout = new char[outputFName.length()+1];
1224 //strcpy(tempout, outputFName.c_str());
1225 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1226 cPara.push_back(tempout);
1229 char* tempA = new char[13];
1230 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1231 //strcpy(tempA, "--uchimealns");
1232 cPara.push_back(tempA);
1233 char* tempa = new char[alns.length()+1];
1234 //strcpy(tempa, alns.c_str());
1235 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1236 cPara.push_back(tempa);
1240 char* tempA = new char[9];
1241 *tempA = '\0'; strncat(tempA, "--strand", 8);
1242 cPara.push_back(tempA);
1243 char* tempa = new char[strand.length()+1];
1244 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1245 cPara.push_back(tempa);
1249 char* tempskew = new char[9];
1250 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1251 //strcpy(tempskew, "--abskew");
1252 cPara.push_back(tempskew);
1253 char* tempSkew = new char[abskew.length()+1];
1254 //strcpy(tempSkew, abskew.c_str());
1255 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1256 cPara.push_back(tempSkew);
1260 char* tempminh = new char[7];
1261 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1262 //strcpy(tempminh, "--minh");
1263 cPara.push_back(tempminh);
1264 char* tempMinH = new char[minh.length()+1];
1265 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1266 //strcpy(tempMinH, minh.c_str());
1267 cPara.push_back(tempMinH);
1271 char* tempmindiv = new char[9];
1272 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1273 //strcpy(tempmindiv, "--mindiv");
1274 cPara.push_back(tempmindiv);
1275 char* tempMindiv = new char[mindiv.length()+1];
1276 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1277 //strcpy(tempMindiv, mindiv.c_str());
1278 cPara.push_back(tempMindiv);
1282 char* tempxn = new char[5];
1283 //strcpy(tempxn, "--xn");
1284 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1285 cPara.push_back(tempxn);
1286 char* tempXn = new char[xn.length()+1];
1287 //strcpy(tempXn, xn.c_str());
1288 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1289 cPara.push_back(tempXn);
1293 char* tempdn = new char[5];
1294 //strcpy(tempdn, "--dn");
1295 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1296 cPara.push_back(tempdn);
1297 char* tempDn = new char[dn.length()+1];
1298 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1299 //strcpy(tempDn, dn.c_str());
1300 cPara.push_back(tempDn);
1304 char* tempxa = new char[5];
1305 //strcpy(tempxa, "--xa");
1306 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1307 cPara.push_back(tempxa);
1308 char* tempXa = new char[xa.length()+1];
1309 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1310 //strcpy(tempXa, xa.c_str());
1311 cPara.push_back(tempXa);
1315 char* tempchunks = new char[9];
1316 //strcpy(tempchunks, "--chunks");
1317 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1318 cPara.push_back(tempchunks);
1319 char* tempChunks = new char[chunks.length()+1];
1320 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1321 //strcpy(tempChunks, chunks.c_str());
1322 cPara.push_back(tempChunks);
1326 char* tempminchunk = new char[11];
1327 //strcpy(tempminchunk, "--minchunk");
1328 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1329 cPara.push_back(tempminchunk);
1330 char* tempMinchunk = new char[minchunk.length()+1];
1331 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1332 //strcpy(tempMinchunk, minchunk.c_str());
1333 cPara.push_back(tempMinchunk);
1336 if (useIdsmoothwindow) {
1337 char* tempidsmoothwindow = new char[17];
1338 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1339 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1340 cPara.push_back(tempidsmoothwindow);
1341 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1342 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1343 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1344 cPara.push_back(tempIdsmoothwindow);
1347 /*if (useMinsmoothid) {
1348 char* tempminsmoothid = new char[14];
1349 //strcpy(tempminsmoothid, "--minsmoothid");
1350 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1351 cPara.push_back(tempminsmoothid);
1352 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1353 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1354 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1355 cPara.push_back(tempMinsmoothid);
1359 char* tempmaxp = new char[7];
1360 //strcpy(tempmaxp, "--maxp");
1361 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1362 cPara.push_back(tempmaxp);
1363 char* tempMaxp = new char[maxp.length()+1];
1364 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1365 //strcpy(tempMaxp, maxp.c_str());
1366 cPara.push_back(tempMaxp);
1370 char* tempskipgaps = new char[13];
1371 //strcpy(tempskipgaps, "--[no]skipgaps");
1372 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1373 cPara.push_back(tempskipgaps);
1377 char* tempskipgaps2 = new char[14];
1378 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1379 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1380 cPara.push_back(tempskipgaps2);
1384 char* tempminlen = new char[9];
1385 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1386 //strcpy(tempminlen, "--minlen");
1387 cPara.push_back(tempminlen);
1388 char* tempMinlen = new char[minlen.length()+1];
1389 //strcpy(tempMinlen, minlen.c_str());
1390 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1391 cPara.push_back(tempMinlen);
1395 char* tempmaxlen = new char[9];
1396 //strcpy(tempmaxlen, "--maxlen");
1397 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1398 cPara.push_back(tempmaxlen);
1399 char* tempMaxlen = new char[maxlen.length()+1];
1400 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1401 //strcpy(tempMaxlen, maxlen.c_str());
1402 cPara.push_back(tempMaxlen);
1406 char* tempucl = new char[5];
1407 strcpy(tempucl, "--ucl");
1408 cPara.push_back(tempucl);
1411 if (useQueryfract) {
1412 char* tempqueryfract = new char[13];
1413 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1414 //strcpy(tempqueryfract, "--queryfract");
1415 cPara.push_back(tempqueryfract);
1416 char* tempQueryfract = new char[queryfract.length()+1];
1417 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1418 //strcpy(tempQueryfract, queryfract.c_str());
1419 cPara.push_back(tempQueryfract);
1423 char** uchimeParameters;
1424 uchimeParameters = new char*[cPara.size()];
1425 string commandString = "";
1426 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1427 //int numArgs = cPara.size();
1429 //uchime_main(numArgs, uchimeParameters);
1430 //cout << "commandString = " << commandString << endl;
1431 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1433 commandString = "\"" + commandString + "\"";
1435 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1436 system(commandString.c_str());
1439 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1440 delete[] uchimeParameters;
1442 //remove "" from filenames
1443 outputFName = outputFName.substr(1, outputFName.length()-2);
1444 filename = filename.substr(1, filename.length()-2);
1445 alns = alns.substr(1, alns.length()-2);
1447 if (m->control_pressed) { return 0; }
1449 //create accnos file from uchime results
1451 m->openInputFile(outputFName, in);
1454 m->openOutputFile(accnos, out);
1460 if (m->control_pressed) { break; }
1463 string chimeraFlag = "";
1464 //in >> chimeraFlag >> name;
1466 string line = m->getline(in);
1467 vector<string> pieces = m->splitWhiteSpace(line);
1468 if (pieces.size() > 2) {
1470 //fix name if needed
1471 if (templatefile == "self") {
1472 name = name.substr(0, name.length()-1); //rip off last /
1473 name = name.substr(0, name.find_last_of('/'));
1476 chimeraFlag = pieces[pieces.size()-1];
1478 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1481 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1487 //if (templatefile != "self") { m->mothurRemove(filename); }
1491 catch(exception& e) {
1492 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1496 /**************************************************************************************************/
1497 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1498 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1502 m->openInputFile(filename, in);
1505 m->openOutputFile(output, out);
1508 if (m->control_pressed) { break; }
1510 Sequence seq(in); m->gobble(in);
1512 if (seq.getName() != "") { seq.printSequence(out); }
1519 catch(exception& e) {
1520 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1524 /**************************************************************************************************/
1526 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1532 vector<string> files;
1534 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1535 //break up file into multiple files
1536 m->divideFile(filename, processors, files);
1538 if (m->control_pressed) { return 0; }
1540 //loop through and create all the processes you want
1541 while (process != processors) {
1545 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1547 }else if (pid == 0){
1548 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1550 //pass numSeqs to parent
1552 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1553 m->openOutputFile(tempFile, out);
1555 out << numChimeras << endl;
1560 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1561 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1567 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1569 //force parent to wait until all the processes are done
1570 for (int i=0;i<processIDS.size();i++) {
1571 int temp = processIDS[i];
1575 for (int i = 0; i < processIDS.size(); i++) {
1577 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1578 m->openInputFile(tempFile, in);
1581 in >> tempNum; m->gobble(in);
1584 numChimeras += tempNum;
1586 in.close(); m->mothurRemove(tempFile);
1589 //////////////////////////////////////////////////////////////////////////////////////////////////////
1590 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1591 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1592 //////////////////////////////////////////////////////////////////////////////////////////////////////
1597 map<int, ofstream*> filehandles;
1598 map<int, ofstream*>::iterator it3;
1601 for (int i = 0; i < processors; i++) {
1602 temp = new ofstream;
1603 filehandles[i] = temp;
1604 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1605 files.push_back(filename+toString(i)+".temp");
1609 m->openInputFile(filename, in);
1613 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1615 Sequence tempSeq(in); m->gobble(in);
1617 if (tempSeq.getName() != "") {
1618 tempSeq.printSequence(*(filehandles[spot]));
1620 if (spot == processors) { spot = 0; }
1626 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1627 (*(it3->second)).close();
1631 //sanity check for number of processors
1632 if (count < processors) { processors = count; }
1634 vector<uchimeData*> pDataArray;
1635 DWORD dwThreadIdArray[processors-1];
1636 HANDLE hThreadArray[processors-1];
1637 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1639 //Create processor worker threads.
1640 for( int i=1; i<processors; i++ ){
1641 // Allocate memory for thread data.
1642 string extension = toString(i) + ".temp";
1644 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i);
1645 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1646 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1648 pDataArray.push_back(tempUchime);
1649 processIDS.push_back(i);
1651 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1652 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1653 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1657 //using the main process as a worker saves time and memory
1658 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1660 //Wait until all threads have terminated.
1661 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1663 //Close all thread handles and free memory allocations.
1664 for(int i=0; i < pDataArray.size(); i++){
1665 num += pDataArray[i]->count;
1666 numChimeras += pDataArray[i]->numChimeras;
1667 CloseHandle(hThreadArray[i]);
1668 delete pDataArray[i];
1672 //append output files
1673 for(int i=0;i<processIDS.size();i++){
1674 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1675 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1677 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1678 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1681 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1682 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1686 //get rid of the file pieces.
1687 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1690 catch(exception& e) {
1691 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1695 /**************************************************************************************************/
1697 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1705 if (groups.size() < processors) { processors = groups.size(); }
1707 //divide the groups between the processors
1708 vector<linePair> lines;
1709 int numGroupsPerProcessor = groups.size() / processors;
1710 for (int i = 0; i < processors; i++) {
1711 int startIndex = i * numGroupsPerProcessor;
1712 int endIndex = (i+1) * numGroupsPerProcessor;
1713 if(i == (processors - 1)){ endIndex = groups.size(); }
1714 lines.push_back(linePair(startIndex, endIndex));
1717 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1719 //loop through and create all the processes you want
1720 while (process != processors) {
1724 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1726 }else if (pid == 0){
1727 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1729 //pass numSeqs to parent
1731 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1732 m->openOutputFile(tempFile, out);
1738 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1739 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1745 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1747 //force parent to wait until all the processes are done
1748 for (int i=0;i<processIDS.size();i++) {
1749 int temp = processIDS[i];
1753 for (int i = 0; i < processIDS.size(); i++) {
1755 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1756 m->openInputFile(tempFile, in);
1757 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1758 in.close(); m->mothurRemove(tempFile);
1762 //////////////////////////////////////////////////////////////////////////////////////////////////////
1763 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1764 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1765 //////////////////////////////////////////////////////////////////////////////////////////////////////
1767 vector<uchimeData*> pDataArray;
1768 DWORD dwThreadIdArray[processors-1];
1769 HANDLE hThreadArray[processors-1];
1771 //Create processor worker threads.
1772 for( int i=1; i<processors; i++ ){
1773 // Allocate memory for thread data.
1774 string extension = toString(i) + ".temp";
1776 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i);
1777 tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1778 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1780 pDataArray.push_back(tempUchime);
1781 processIDS.push_back(i);
1783 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1784 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1785 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1789 //using the main process as a worker saves time and memory
1790 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1792 //Wait until all threads have terminated.
1793 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1795 //Close all thread handles and free memory allocations.
1796 for(int i=0; i < pDataArray.size(); i++){
1797 num += pDataArray[i]->count;
1798 CloseHandle(hThreadArray[i]);
1799 delete pDataArray[i];
1804 //append output files
1805 for(int i=0;i<processIDS.size();i++){
1806 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1807 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1809 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1810 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1813 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1814 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1821 catch(exception& e) {
1822 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1826 /**************************************************************************************************/