2 * chimerauchimecommand.cpp
5 * Created by westcott on 5/13/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){
20 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39 CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
41 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
50 vector<string> myArray;
51 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
55 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){
62 string helpString = "";
63 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68 helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71 helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
74 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
93 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
95 helpString += "The chimera.uchime command should be in the following format: \n";
96 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
111 if (type == "chimera") { pattern = "[filename],uchime.chimeras"; }
112 else if (type == "accnos") { pattern = "[filename],uchime.accnos"; }
113 else if (type == "alns") { pattern = "[filename],uchime.alns"; }
114 else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){
127 abort = true; calledHelp = true;
129 vector<string> tempOutNames;
130 outputTypes["chimera"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["alns"] = tempOutNames;
133 outputTypes["count"] = tempOutNames;
135 catch(exception& e) {
136 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
143 abort = false; calledHelp = false; hasName=false; hasCount=false;
144 ReferenceDB* rdb = ReferenceDB::getInstance();
146 //allow user to run help
147 if(option == "help") { help(); abort = true; calledHelp = true; }
148 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151 vector<string> myArray = setParameters();
153 OptionParser parser(option);
154 map<string,string> parameters = parser.getParameters();
156 ValidParameters validParameter("chimera.uchime");
157 map<string,string>::iterator it;
159 //check to make sure all parameters are valid for command
160 for (it = parameters.begin(); it != parameters.end(); it++) {
161 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
164 vector<string> tempOutNames;
165 outputTypes["chimera"] = tempOutNames;
166 outputTypes["accnos"] = tempOutNames;
167 outputTypes["alns"] = tempOutNames;
168 outputTypes["count"] = tempOutNames;
170 //if the user changes the input directory command factory will send this info to us in the output parameter
171 string inputDir = validParameter.validFile(parameters, "inputdir", false);
172 if (inputDir == "not found"){ inputDir = ""; }
174 //check for required parameters
175 fastafile = validParameter.validFile(parameters, "fasta", false);
176 if (fastafile == "not found") {
177 //if there is a current fasta file, use it
178 string filename = m->getFastaFile();
179 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182 m->splitAtDash(fastafile, fastaFileNames);
184 //go through files and make sure they are good, if not, then disregard them
185 for (int i = 0; i < fastaFileNames.size(); i++) {
188 if (fastaFileNames[i] == "current") {
189 fastaFileNames[i] = m->getFastaFile();
190 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
193 //erase from file list
194 fastaFileNames.erase(fastaFileNames.begin()+i);
201 if (inputDir != "") {
202 string path = m->hasPath(fastaFileNames[i]);
203 //if the user has not given a path then, add inputdir. else leave path alone.
204 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
210 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212 //if you can't open it, try default location
213 if (ableToOpen == 1) {
214 if (m->getDefaultPath() != "") { //default path is set
215 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220 fastaFileNames[i] = tryPath;
224 if (ableToOpen == 1) {
225 if (m->getOutputDir() != "") { //default path is set
226 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231 fastaFileNames[i] = tryPath;
237 if (ableToOpen == 1) {
238 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
239 //erase from file list
240 fastaFileNames.erase(fastaFileNames.begin()+i);
243 m->setFastaFile(fastaFileNames[i]);
248 //make sure there is at least one valid file left
249 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
253 //check for required parameters
254 namefile = validParameter.validFile(parameters, "name", false);
255 if (namefile == "not found") { namefile = ""; }
257 m->splitAtDash(namefile, nameFileNames);
259 //go through files and make sure they are good, if not, then disregard them
260 for (int i = 0; i < nameFileNames.size(); i++) {
263 if (nameFileNames[i] == "current") {
264 nameFileNames[i] = m->getNameFile();
265 if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
268 //erase from file list
269 nameFileNames.erase(nameFileNames.begin()+i);
276 if (inputDir != "") {
277 string path = m->hasPath(nameFileNames[i]);
278 //if the user has not given a path then, add inputdir. else leave path alone.
279 if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
285 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287 //if you can't open it, try default location
288 if (ableToOpen == 1) {
289 if (m->getDefaultPath() != "") { //default path is set
290 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295 nameFileNames[i] = tryPath;
299 if (ableToOpen == 1) {
300 if (m->getOutputDir() != "") { //default path is set
301 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306 nameFileNames[i] = tryPath;
312 if (ableToOpen == 1) {
313 m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
314 //erase from file list
315 nameFileNames.erase(nameFileNames.begin()+i);
318 m->setNameFile(nameFileNames[i]);
324 if (nameFileNames.size() != 0) { hasName = true; }
326 //check for required parameters
327 vector<string> countfileNames;
328 countfile = validParameter.validFile(parameters, "count", false);
329 if (countfile == "not found") {
332 m->splitAtDash(countfile, countfileNames);
334 //go through files and make sure they are good, if not, then disregard them
335 for (int i = 0; i < countfileNames.size(); i++) {
338 if (countfileNames[i] == "current") {
339 countfileNames[i] = m->getCountTableFile();
340 if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342 m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
343 //erase from file list
344 countfileNames.erase(countfileNames.begin()+i);
351 if (inputDir != "") {
352 string path = m->hasPath(countfileNames[i]);
353 //if the user has not given a path then, add inputdir. else leave path alone.
354 if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
360 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362 //if you can't open it, try default location
363 if (ableToOpen == 1) {
364 if (m->getDefaultPath() != "") { //default path is set
365 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370 countfileNames[i] = tryPath;
374 if (ableToOpen == 1) {
375 if (m->getOutputDir() != "") { //default path is set
376 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381 countfileNames[i] = tryPath;
387 if (ableToOpen == 1) {
388 m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
389 //erase from file list
390 countfileNames.erase(countfileNames.begin()+i);
393 m->setCountTableFile(countfileNames[i]);
399 if (countfileNames.size() != 0) { hasCount = true; }
401 //make sure there is at least one valid file left
402 if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404 if (!hasName && hasCount) { nameFileNames = countfileNames; }
406 if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408 bool hasGroup = true;
409 groupfile = validParameter.validFile(parameters, "group", false);
410 if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
412 m->splitAtDash(groupfile, groupFileNames);
414 //go through files and make sure they are good, if not, then disregard them
415 for (int i = 0; i < groupFileNames.size(); i++) {
418 if (groupFileNames[i] == "current") {
419 groupFileNames[i] = m->getGroupFile();
420 if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422 m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
423 //erase from file list
424 groupFileNames.erase(groupFileNames.begin()+i);
431 if (inputDir != "") {
432 string path = m->hasPath(groupFileNames[i]);
433 //if the user has not given a path then, add inputdir. else leave path alone.
434 if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
440 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442 //if you can't open it, try default location
443 if (ableToOpen == 1) {
444 if (m->getDefaultPath() != "") { //default path is set
445 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450 groupFileNames[i] = tryPath;
454 if (ableToOpen == 1) {
455 if (m->getOutputDir() != "") { //default path is set
456 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461 groupFileNames[i] = tryPath;
467 if (ableToOpen == 1) {
468 m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
469 //erase from file list
470 groupFileNames.erase(groupFileNames.begin()+i);
473 m->setGroupFile(groupFileNames[i]);
478 //make sure there is at least one valid file left
479 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482 if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484 if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
485 //if the user changes the output directory command factory will send this info to us in the output parameter
486 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
489 //if the user changes the output directory command factory will send this info to us in the output parameter
490 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
493 it = parameters.find("reference");
494 //user has given a template file
495 if(it != parameters.end()){
496 if (it->second == "self") { templatefile = "self"; }
498 path = m->hasPath(it->second);
499 //if the user has not given a path then, add inputdir. else leave path alone.
500 if (path == "") { parameters["reference"] = inputDir + it->second; }
502 templatefile = validParameter.validFile(parameters, "reference", true);
503 if (templatefile == "not open") { abort = true; }
504 else if (templatefile == "not found") { //check for saved reference sequences
505 if (rdb->getSavedReference() != "") {
506 templatefile = rdb->getSavedReference();
507 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
510 m->mothurOutEndLine();
515 }else if (hasName) { templatefile = "self"; }
516 else if (hasCount) { templatefile = "self"; }
518 if (rdb->getSavedReference() != "") {
519 templatefile = rdb->getSavedReference();
520 m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
523 m->mothurOutEndLine();
524 templatefile = ""; abort = true;
528 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
529 m->setProcessors(temp);
530 m->mothurConvert(temp, processors);
532 abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
533 if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535 temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
536 chimealns = m->isTrue(temp);
538 minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; }
539 mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; }
540 xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
541 dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
542 xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; }
543 chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; }
544 minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; }
545 idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; }
546 //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; }
547 maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; }
548 minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; }
549 maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; }
551 strand = validParameter.validFile(parameters, "strand", false); if (strand == "not found") { strand = ""; }
553 temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; }
554 ucl = m->isTrue(temp);
556 queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; }
557 if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
559 temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; }
560 skipgaps = m->isTrue(temp);
562 temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
563 skipgaps2 = m->isTrue(temp);
566 temp = validParameter.validFile(parameters, "dereplicate", false);
567 if (temp == "not found") { temp = "false"; }
568 dups = m->isTrue(temp);
571 if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
572 if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
573 if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575 //look for uchime exe
577 string tempPath = path;
578 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
579 path = path.substr(0, (tempPath.find_last_of('m')));
581 string uchimeCommand;
582 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
583 uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability
585 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = ");
586 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
587 newCommand->execute();
589 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
590 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
591 newCommand->execute();
595 uchimeCommand = path + "uchime.exe";
598 //test to make sure uchime exists
600 uchimeCommand = m->getFullPathName(uchimeCommand);
601 int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
602 if(ableToOpen == 1) {
603 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
604 //check to see if uchime is in the path??
606 string uLocation = m->findProgramPath("uchime");
610 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
611 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
613 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
616 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
617 else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
618 }else { uchimeLocation = uchimeCommand; }
620 uchimeLocation = m->getFullPathName(uchimeLocation);
623 catch(exception& e) {
624 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
628 //***************************************************************************************************************
630 int ChimeraUchimeCommand::execute(){
633 if (abort == true) { if (calledHelp) { return 0; } return 2; }
635 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
637 for (int s = 0; s < fastaFileNames.size(); s++) {
639 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
641 int start = time(NULL);
642 string nameFile = "";
643 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
644 map<string, string> variables;
645 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
646 string outputFileName = getOutputFileName("chimera", variables);
647 string accnosFileName = getOutputFileName("accnos", variables);
648 string alnsFileName = getOutputFileName("alns", variables);
649 string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
650 string newCountFile = "";
652 //you provided a groupfile
653 string groupFile = "";
654 bool hasGroup = false;
655 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
658 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
659 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
660 newCountFile = getOutputFileName("count", variables);
663 if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
665 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
666 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
667 nameFile = nameFileNames[s];
668 }else { nameFile = getNamesFile(fastaFileNames[s]); }
670 map<string, string> seqs;
671 readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
674 vector<seqPriorityNode> nameMapCount;
678 ct.readTable(nameFile, true, false);
679 for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
680 int num = ct.getNumSeqs(it->first);
681 if (num == 0) { error = 1; }
683 seqPriorityNode temp(num, it->second, it->first);
684 nameMapCount.push_back(temp);
688 error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
690 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
691 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
693 printFile(nameMapCount, newFasta);
694 fastaFileNames[s] = newFasta;
697 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
700 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
701 nameFile = nameFileNames[s];
702 }else { nameFile = getNamesFile(fastaFileNames[s]); }
704 //Parse sequences by group
705 vector<string> groups;
706 map<string, string> uniqueNames;
708 cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
709 groups = cparser->getNamesOfGroups();
710 uniqueNames = cparser->getAllSeqsMap();
712 sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
713 groups = sparser->getNamesOfGroups();
714 uniqueNames = sparser->getAllSeqsMap();
717 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
720 ofstream out, out1, out2;
721 m->openOutputFile(outputFileName, out); out.close();
722 m->openOutputFile(accnosFileName, out1); out1.close();
723 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
726 if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
728 if (hasCount && dups) {
729 CountTable c; c.readTable(nameFile, true, false);
730 if (!m->isBlank(newCountFile)) {
732 m->openInputFile(newCountFile, in2);
736 in2 >> name >> group; m->gobble(in2);
737 c.setAbund(name, group, 0);
741 m->mothurRemove(newCountFile);
742 c.printTable(newCountFile);
745 }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
747 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
751 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
753 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
754 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
758 set<string> doNotRemove;
759 CountTable c; c.readTable(newCountFile, true, true);
760 vector<string> namesInTable = c.getNamesOfSeqs();
761 for (int i = 0; i < namesInTable.size(); i++) {
762 int temp = c.getNumSeqs(namesInTable[i]);
763 if (temp == 0) { c.remove(namesInTable[i]); }
764 else { doNotRemove.insert((namesInTable[i])); }
766 //remove names we want to keep from accnos file.
767 set<string> accnosNames = m->readAccnos(accnosFileName);
769 m->openOutputFile(accnosFileName, out2);
770 for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
771 if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
774 c.printTable(newCountFile);
775 outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
779 if (hasCount) { delete cparser; }
780 else { delete sparser; }
782 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
785 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
790 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
791 else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
795 m->openOutputFile(outputFileName+".temp", out);
796 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
799 m->appendFiles(outputFileName, outputFileName+".temp");
800 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
802 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
804 //remove file made for uchime
805 if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
807 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
810 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
811 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
812 if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
815 //set accnos file as new current accnosfile
817 itTypes = outputTypes.find("accnos");
818 if (itTypes != outputTypes.end()) {
819 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
822 itTypes = outputTypes.find("count");
823 if (itTypes != outputTypes.end()) {
824 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
827 m->mothurOutEndLine();
828 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
829 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
830 m->mothurOutEndLine();
835 catch(exception& e) {
836 m->errorOut(e, "ChimeraUchimeCommand", "execute");
840 //**********************************************************************************************************************
841 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
843 map<string, string>::iterator itUnique;
847 m->openOutputFile(accnosFileName+".temp", out2);
850 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
851 set<string>::iterator itNames;
852 set<string> chimerasInFile;
853 set<string>::iterator itChimeras;
855 if (!m->isBlank(accnosFileName)) {
858 m->openInputFile(accnosFileName, in2);
861 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
863 in2 >> name; m->gobble(in2);
866 itUnique = uniqueNames.find(name);
868 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
870 itChimeras = chimerasInFile.find((itUnique->second));
872 if (itChimeras == chimerasInFile.end()) {
873 out2 << itUnique->second << endl;
874 chimerasInFile.insert((itUnique->second));
883 m->mothurRemove(accnosFileName);
884 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
890 m->openInputFile(outputFileName, in);
893 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
894 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
897 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
900 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
901 /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
902 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
903 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
908 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
911 in >> temp1; m->gobble(in);
912 in >> name; m->gobble(in);
913 in >> parent1; m->gobble(in);
914 in >> parent2; m->gobble(in);
915 in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
918 //parse name - name will look like U68590/ab=1/
919 string restOfName = "";
920 int pos = name.find_first_of('/');
921 if (pos != string::npos) {
922 restOfName = name.substr(pos);
923 name = name.substr(0, pos);
927 itUnique = uniqueNames.find(name);
929 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
931 name = itUnique->second;
932 //is this name already in the file
933 itNames = namesInFile.find((name));
935 if (itNames == namesInFile.end()) { //no not in file
936 if (flag == "N") { //are you really a no??
937 //is this sequence really not chimeric??
938 itChimeras = chimerasInFile.find(name);
940 //then you really are a no so print, otherwise skip
941 if (itChimeras == chimerasInFile.end()) { print = true; }
942 }else{ print = true; }
947 out << temp1 << '\t' << name << restOfName << '\t';
948 namesInFile.insert(name);
950 //parse parent1 names
951 if (parent1 != "*") {
953 pos = parent1.find_first_of('/');
954 if (pos != string::npos) {
955 restOfName = parent1.substr(pos);
956 parent1 = parent1.substr(0, pos);
959 itUnique = uniqueNames.find(parent1);
960 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
961 else { out << itUnique->second << restOfName << '\t'; }
962 }else { out << parent1 << '\t'; }
964 //parse parent2 names
965 if (parent2 != "*") {
967 pos = parent2.find_first_of('/');
968 if (pos != string::npos) {
969 restOfName = parent2.substr(pos);
970 parent2 = parent2.substr(0, pos);
973 itUnique = uniqueNames.find(parent2);
974 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
975 else { out << itUnique->second << restOfName << '\t'; }
976 }else { out << parent2 << '\t'; }
978 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
984 m->mothurRemove(outputFileName);
985 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
989 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
991 ------------------------------------------------------------------------
992 Query ( 179 nt) F21Fcsw_11639/ab=591/
993 ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
994 ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
996 A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
997 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
998 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
999 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
1000 Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
1001 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1003 A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1004 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1005 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1006 Diffs NNN N N N N N BB NNN
1007 Votes 000 0 0 0 0 0 ++ 000
1008 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1010 A 159 TGGAACTGAGACACGGTCCAA 179
1011 Q 159 TGGAACTGAGACACGGTCCAA 179
1012 B 161 TGGAACTGAGACACGGTCCAA 181
1015 Model BBBBBBBBBBBBBBBBBBBBB
1017 Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1018 Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1022 m->openInputFile(alnsFileName, in3);
1025 m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1028 namesInFile.clear();
1031 while (!in3.eof()) {
1032 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1035 line = m->getline(in3);
1039 istringstream iss(line);
1042 //are you a name line
1043 if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1045 for (int i = 0; i < line.length(); i++) {
1047 if (line[i] == ')') { break; }
1048 else { out3 << line[i]; }
1051 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1052 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1054 out << line[spot] << line[spot+1];
1056 name = line.substr(spot+2);
1058 //parse name - name will either look like U68590/ab=1/ or U68590
1059 string restOfName = "";
1060 int pos = name.find_first_of('/');
1061 if (pos != string::npos) {
1062 restOfName = name.substr(pos);
1063 name = name.substr(0, pos);
1067 itUnique = uniqueNames.find(name);
1069 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
1071 //only limit repeats on query names
1072 if (temp == "Query") {
1073 itNames = namesInFile.find((itUnique->second));
1075 if (itNames == namesInFile.end()) {
1076 out << itUnique->second << restOfName << endl;
1077 namesInFile.insert((itUnique->second));
1079 }else { out << itUnique->second << restOfName << endl; }
1084 }else { //not need to alter line
1085 out3 << line << endl;
1087 }else { out3 << endl; }
1092 m->mothurRemove(alnsFileName);
1093 rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1098 catch(exception& e) {
1099 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1103 //**********************************************************************************************************************
1104 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1107 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1110 m->openOutputFile(filename, out);
1112 //print new file in order of
1113 for (int i = 0; i < nameMapCount.size(); i++) {
1114 out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1120 catch(exception& e) {
1121 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1125 //**********************************************************************************************************************
1126 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1128 //create input file for uchime
1129 //read through fastafile and store info
1131 m->openInputFile(filename, in);
1135 if (m->control_pressed) { in.close(); return 0; }
1137 Sequence seq(in); m->gobble(in);
1138 seqs[seq.getName()] = seq.getAligned();
1144 catch(exception& e) {
1145 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1149 //**********************************************************************************************************************
1151 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1153 string nameFile = "";
1155 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1157 //use unique.seqs to create new name and fastafile
1158 string inputString = "fasta=" + inputFile;
1159 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1160 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
1161 m->mothurCalling = true;
1163 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1164 uniqueCommand->execute();
1166 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1168 delete uniqueCommand;
1169 m->mothurCalling = false;
1170 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1172 nameFile = filenames["name"][0];
1173 inputFile = filenames["fasta"][0];
1177 catch(exception& e) {
1178 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1182 //**********************************************************************************************************************
1183 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1187 int numChimeras = 0;
1190 ofstream outCountList;
1191 if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1193 for (int i = start; i < end; i++) {
1194 int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
1197 if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1198 else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
1200 int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1201 totalSeqs += numSeqs;
1203 if (m->control_pressed) { return 0; }
1205 //remove file made for uchime
1206 if (!m->debug) { m->mothurRemove(filename); }
1207 else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1209 //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1210 //This table will zero out group counts for seqs determined to be chimeric by that group.
1212 if (!m->isBlank(accnos+groups[i])) {
1214 m->openInputFile(accnos+groups[i], in);
1218 in >> name; m->gobble(in);
1219 outCountList << name << '\t' << groups[i] << endl;
1223 map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1224 map<string, string>::iterator itN;
1226 m->openOutputFile(accnos+groups[i]+".temp", out);
1228 in >> name; m->gobble(in);
1229 itN = thisnamemap.find(name);
1230 if (itN != thisnamemap.end()) {
1231 vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1232 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1234 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1238 m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1245 m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1246 m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1247 if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1249 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
1252 if (hasCount && dups) { outCountList.close(); }
1257 catch(exception& e) {
1258 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1262 //**********************************************************************************************************************
1264 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1267 outputFName = m->getFullPathName(outputFName);
1268 filename = m->getFullPathName(filename);
1269 alns = m->getFullPathName(alns);
1271 //to allow for spaces in the path
1272 outputFName = "\"" + outputFName + "\"";
1273 filename = "\"" + filename + "\"";
1274 alns = "\"" + alns + "\"";
1276 vector<char*> cPara;
1278 string uchimeCommand = uchimeLocation;
1279 uchimeCommand = "\"" + uchimeCommand + "\" ";
1282 tempUchime= new char[uchimeCommand.length()+1];
1284 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1285 cPara.push_back(tempUchime);
1287 //are you using a reference file
1288 if (templatefile != "self") {
1289 string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1290 prepFile(filename.substr(1, filename.length()-2), outputFileName);
1291 filename = outputFileName;
1292 filename = "\"" + filename + "\"";
1293 //add reference file
1294 char* tempRef = new char[5];
1295 //strcpy(tempRef, "--db");
1296 *tempRef = '\0'; strncat(tempRef, "--db", 4);
1297 cPara.push_back(tempRef);
1298 char* tempR = new char[templatefile.length()+1];
1299 //strcpy(tempR, templatefile.c_str());
1300 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1301 cPara.push_back(tempR);
1304 char* tempIn = new char[8];
1305 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1306 //strcpy(tempIn, "--input");
1307 cPara.push_back(tempIn);
1308 char* temp = new char[filename.length()+1];
1309 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1310 //strcpy(temp, filename.c_str());
1311 cPara.push_back(temp);
1313 char* tempO = new char[12];
1314 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1315 //strcpy(tempO, "--uchimeout");
1316 cPara.push_back(tempO);
1317 char* tempout = new char[outputFName.length()+1];
1318 //strcpy(tempout, outputFName.c_str());
1319 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1320 cPara.push_back(tempout);
1323 char* tempA = new char[13];
1324 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1325 //strcpy(tempA, "--uchimealns");
1326 cPara.push_back(tempA);
1327 char* tempa = new char[alns.length()+1];
1328 //strcpy(tempa, alns.c_str());
1329 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1330 cPara.push_back(tempa);
1334 char* tempA = new char[9];
1335 *tempA = '\0'; strncat(tempA, "--strand", 8);
1336 cPara.push_back(tempA);
1337 char* tempa = new char[strand.length()+1];
1338 *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1339 cPara.push_back(tempa);
1343 char* tempskew = new char[9];
1344 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1345 //strcpy(tempskew, "--abskew");
1346 cPara.push_back(tempskew);
1347 char* tempSkew = new char[abskew.length()+1];
1348 //strcpy(tempSkew, abskew.c_str());
1349 *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1350 cPara.push_back(tempSkew);
1354 char* tempminh = new char[7];
1355 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1356 //strcpy(tempminh, "--minh");
1357 cPara.push_back(tempminh);
1358 char* tempMinH = new char[minh.length()+1];
1359 *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1360 //strcpy(tempMinH, minh.c_str());
1361 cPara.push_back(tempMinH);
1365 char* tempmindiv = new char[9];
1366 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1367 //strcpy(tempmindiv, "--mindiv");
1368 cPara.push_back(tempmindiv);
1369 char* tempMindiv = new char[mindiv.length()+1];
1370 *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1371 //strcpy(tempMindiv, mindiv.c_str());
1372 cPara.push_back(tempMindiv);
1376 char* tempxn = new char[5];
1377 //strcpy(tempxn, "--xn");
1378 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1379 cPara.push_back(tempxn);
1380 char* tempXn = new char[xn.length()+1];
1381 //strcpy(tempXn, xn.c_str());
1382 *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1383 cPara.push_back(tempXn);
1387 char* tempdn = new char[5];
1388 //strcpy(tempdn, "--dn");
1389 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1390 cPara.push_back(tempdn);
1391 char* tempDn = new char[dn.length()+1];
1392 *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1393 //strcpy(tempDn, dn.c_str());
1394 cPara.push_back(tempDn);
1398 char* tempxa = new char[5];
1399 //strcpy(tempxa, "--xa");
1400 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1401 cPara.push_back(tempxa);
1402 char* tempXa = new char[xa.length()+1];
1403 *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1404 //strcpy(tempXa, xa.c_str());
1405 cPara.push_back(tempXa);
1409 char* tempchunks = new char[9];
1410 //strcpy(tempchunks, "--chunks");
1411 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1412 cPara.push_back(tempchunks);
1413 char* tempChunks = new char[chunks.length()+1];
1414 *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1415 //strcpy(tempChunks, chunks.c_str());
1416 cPara.push_back(tempChunks);
1420 char* tempminchunk = new char[11];
1421 //strcpy(tempminchunk, "--minchunk");
1422 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1423 cPara.push_back(tempminchunk);
1424 char* tempMinchunk = new char[minchunk.length()+1];
1425 *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1426 //strcpy(tempMinchunk, minchunk.c_str());
1427 cPara.push_back(tempMinchunk);
1430 if (useIdsmoothwindow) {
1431 char* tempidsmoothwindow = new char[17];
1432 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1433 //strcpy(tempidsmoothwindow, "--idsmoothwindow");
1434 cPara.push_back(tempidsmoothwindow);
1435 char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1436 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1437 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1438 cPara.push_back(tempIdsmoothwindow);
1441 /*if (useMinsmoothid) {
1442 char* tempminsmoothid = new char[14];
1443 //strcpy(tempminsmoothid, "--minsmoothid");
1444 *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1445 cPara.push_back(tempminsmoothid);
1446 char* tempMinsmoothid = new char[minsmoothid.length()+1];
1447 *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1448 //strcpy(tempMinsmoothid, minsmoothid.c_str());
1449 cPara.push_back(tempMinsmoothid);
1453 char* tempmaxp = new char[7];
1454 //strcpy(tempmaxp, "--maxp");
1455 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1456 cPara.push_back(tempmaxp);
1457 char* tempMaxp = new char[maxp.length()+1];
1458 *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1459 //strcpy(tempMaxp, maxp.c_str());
1460 cPara.push_back(tempMaxp);
1464 char* tempskipgaps = new char[13];
1465 //strcpy(tempskipgaps, "--[no]skipgaps");
1466 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1467 cPara.push_back(tempskipgaps);
1471 char* tempskipgaps2 = new char[14];
1472 //strcpy(tempskipgaps2, "--[no]skipgaps2");
1473 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1474 cPara.push_back(tempskipgaps2);
1478 char* tempminlen = new char[9];
1479 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1480 //strcpy(tempminlen, "--minlen");
1481 cPara.push_back(tempminlen);
1482 char* tempMinlen = new char[minlen.length()+1];
1483 //strcpy(tempMinlen, minlen.c_str());
1484 *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1485 cPara.push_back(tempMinlen);
1489 char* tempmaxlen = new char[9];
1490 //strcpy(tempmaxlen, "--maxlen");
1491 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1492 cPara.push_back(tempmaxlen);
1493 char* tempMaxlen = new char[maxlen.length()+1];
1494 *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1495 //strcpy(tempMaxlen, maxlen.c_str());
1496 cPara.push_back(tempMaxlen);
1500 char* tempucl = new char[5];
1501 strcpy(tempucl, "--ucl");
1502 cPara.push_back(tempucl);
1505 if (useQueryfract) {
1506 char* tempqueryfract = new char[13];
1507 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1508 //strcpy(tempqueryfract, "--queryfract");
1509 cPara.push_back(tempqueryfract);
1510 char* tempQueryfract = new char[queryfract.length()+1];
1511 *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1512 //strcpy(tempQueryfract, queryfract.c_str());
1513 cPara.push_back(tempQueryfract);
1517 char** uchimeParameters;
1518 uchimeParameters = new char*[cPara.size()];
1519 string commandString = "";
1520 for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
1521 //int numArgs = cPara.size();
1523 //uchime_main(numArgs, uchimeParameters);
1524 //cout << "commandString = " << commandString << endl;
1525 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1527 commandString = "\"" + commandString + "\"";
1529 if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1530 system(commandString.c_str());
1533 for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
1534 delete[] uchimeParameters;
1536 //remove "" from filenames
1537 outputFName = outputFName.substr(1, outputFName.length()-2);
1538 filename = filename.substr(1, filename.length()-2);
1539 alns = alns.substr(1, alns.length()-2);
1541 if (m->control_pressed) { return 0; }
1543 //create accnos file from uchime results
1545 m->openInputFile(outputFName, in);
1548 m->openOutputFile(accnos, out);
1554 if (m->control_pressed) { break; }
1557 string chimeraFlag = "";
1558 //in >> chimeraFlag >> name;
1560 string line = m->getline(in);
1561 vector<string> pieces = m->splitWhiteSpace(line);
1562 if (pieces.size() > 2) {
1564 //fix name if needed
1565 if (templatefile == "self") {
1566 name = name.substr(0, name.length()-1); //rip off last /
1567 name = name.substr(0, name.find_last_of('/'));
1570 chimeraFlag = pieces[pieces.size()-1];
1572 //for (int i = 0; i < 15; i++) { in >> chimeraFlag; }
1575 if (chimeraFlag == "Y") { out << name << endl; numChimeras++; }
1581 //if (templatefile != "self") { m->mothurRemove(filename); }
1585 catch(exception& e) {
1586 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1590 /**************************************************************************************************/
1591 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1592 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1596 m->openInputFile(filename, in);
1599 m->openOutputFile(output, out);
1602 if (m->control_pressed) { break; }
1604 Sequence seq(in); m->gobble(in);
1606 if (seq.getName() != "") { seq.printSequence(out); }
1613 catch(exception& e) {
1614 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1618 /**************************************************************************************************/
1620 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1626 vector<string> files;
1628 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1629 //break up file into multiple files
1630 m->divideFile(filename, processors, files);
1632 if (m->control_pressed) { return 0; }
1634 //loop through and create all the processes you want
1635 while (process != processors) {
1639 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1641 }else if (pid == 0){
1642 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1644 //pass numSeqs to parent
1646 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1647 m->openOutputFile(tempFile, out);
1649 out << numChimeras << endl;
1654 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1655 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1661 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1663 //force parent to wait until all the processes are done
1664 for (int i=0;i<processIDS.size();i++) {
1665 int temp = processIDS[i];
1669 for (int i = 0; i < processIDS.size(); i++) {
1671 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
1672 m->openInputFile(tempFile, in);
1675 in >> tempNum; m->gobble(in);
1678 numChimeras += tempNum;
1680 in.close(); m->mothurRemove(tempFile);
1683 //////////////////////////////////////////////////////////////////////////////////////////////////////
1684 //Windows version shared memory, so be careful when passing variables through the preClusterData struct.
1685 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1686 //////////////////////////////////////////////////////////////////////////////////////////////////////
1691 map<int, ofstream*> filehandles;
1692 map<int, ofstream*>::iterator it3;
1695 for (int i = 0; i < processors; i++) {
1696 temp = new ofstream;
1697 filehandles[i] = temp;
1698 m->openOutputFile(filename+toString(i)+".temp", *(temp));
1699 files.push_back(filename+toString(i)+".temp");
1703 m->openInputFile(filename, in);
1707 if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1709 Sequence tempSeq(in); m->gobble(in);
1711 if (tempSeq.getName() != "") {
1712 tempSeq.printSequence(*(filehandles[spot]));
1714 if (spot == processors) { spot = 0; }
1720 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1721 (*(it3->second)).close();
1725 //sanity check for number of processors
1726 if (count < processors) { processors = count; }
1728 vector<uchimeData*> pDataArray;
1729 DWORD dwThreadIdArray[processors-1];
1730 HANDLE hThreadArray[processors-1];
1731 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1733 //Create processor worker threads.
1734 for( int i=1; i<processors; i++ ){
1735 // Allocate memory for thread data.
1736 string extension = toString(i) + ".temp";
1738 uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i);
1739 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1740 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1742 pDataArray.push_back(tempUchime);
1743 processIDS.push_back(i);
1745 //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1746 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1747 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1751 //using the main process as a worker saves time and memory
1752 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1754 //Wait until all threads have terminated.
1755 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1757 //Close all thread handles and free memory allocations.
1758 for(int i=0; i < pDataArray.size(); i++){
1759 num += pDataArray[i]->count;
1760 numChimeras += pDataArray[i]->numChimeras;
1761 CloseHandle(hThreadArray[i]);
1762 delete pDataArray[i];
1766 //append output files
1767 for(int i=0;i<processIDS.size();i++){
1768 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1769 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1771 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1772 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1775 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1776 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1780 //get rid of the file pieces.
1781 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1784 catch(exception& e) {
1785 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1789 /**************************************************************************************************/
1791 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1798 CountTable newCount;
1799 if (hasCount && dups) { newCount.readTable(nameFile, true, false); }
1802 if (groups.size() < processors) { processors = groups.size(); }
1804 //divide the groups between the processors
1805 vector<linePair> lines;
1806 int numGroupsPerProcessor = groups.size() / processors;
1807 for (int i = 0; i < processors; i++) {
1808 int startIndex = i * numGroupsPerProcessor;
1809 int endIndex = (i+1) * numGroupsPerProcessor;
1810 if(i == (processors - 1)){ endIndex = groups.size(); }
1811 lines.push_back(linePair(startIndex, endIndex));
1814 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1816 //loop through and create all the processes you want
1817 while (process != processors) {
1821 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
1823 }else if (pid == 0){
1824 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1826 //pass numSeqs to parent
1828 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1829 m->openOutputFile(tempFile, out);
1835 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
1836 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1842 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1844 //force parent to wait until all the processes are done
1845 for (int i=0;i<processIDS.size();i++) {
1846 int temp = processIDS[i];
1850 for (int i = 0; i < processIDS.size(); i++) {
1852 string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
1853 m->openInputFile(tempFile, in);
1854 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1855 in.close(); m->mothurRemove(tempFile);
1859 //////////////////////////////////////////////////////////////////////////////////////////////////////
1860 //Windows version shared memory, so be careful when passing variables through the uchimeData struct.
1861 //Above fork() will clone, so memory is separate, but that's not the case with windows,
1862 //////////////////////////////////////////////////////////////////////////////////////////////////////
1864 vector<uchimeData*> pDataArray;
1865 DWORD dwThreadIdArray[processors-1];
1866 HANDLE hThreadArray[processors-1];
1868 //Create processor worker threads.
1869 for( int i=1; i<processors; i++ ){
1870 // Allocate memory for thread data.
1871 string extension = toString(i) + ".temp";
1873 uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i);
1874 tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1875 tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1877 pDataArray.push_back(tempUchime);
1878 processIDS.push_back(i);
1880 //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1881 //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1882 hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
1886 //using the main process as a worker saves time and memory
1887 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1889 //Wait until all threads have terminated.
1890 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1892 //Close all thread handles and free memory allocations.
1893 for(int i=0; i < pDataArray.size(); i++){
1894 num += pDataArray[i]->count;
1895 CloseHandle(hThreadArray[i]);
1896 delete pDataArray[i];
1903 if (hasCount && dups) {
1904 if (!m->isBlank(accnos + ".byCount")) {
1906 m->openInputFile(accnos + ".byCount", in2);
1909 while (!in2.eof()) {
1910 in2 >> name >> group; m->gobble(in2);
1911 newCount.setAbund(name, group, 0);
1915 m->mothurRemove(accnos + ".byCount");
1918 //append output files
1919 for(int i=0;i<processIDS.size();i++){
1920 m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1921 m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1923 m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1924 m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1927 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1928 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1931 if (hasCount && dups) {
1932 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1934 m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1937 while (!in2.eof()) {
1938 in2 >> name >> group; m->gobble(in2);
1939 newCount.setAbund(name, group, 0);
1943 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1948 //print new *.pick.count_table
1949 if (hasCount && dups) { newCount.printTable(newCountFile); }
1954 catch(exception& e) {
1955 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1959 /**************************************************************************************************/