]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
working on dereplicate=t issue in chimera.slayer and chimera.perseus, added appendFil...
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26         CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40
41                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49
50                 vector<string> myArray;
51                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){   
61         try {
62                 string helpString = "";
63                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68         helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
74                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 #ifdef USE_MPI
93                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 #endif
95                 helpString += "The chimera.uchime command should be in the following format: \n";
96                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
113         else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
114         else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false; hasName=false; hasCount=false;
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145                 
146                 //allow user to run help
147                 if(option == "help") { help(); abort = true; calledHelp = true; }
148                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
149                 
150                 else {
151                         vector<string> myArray = setParameters();
152                         
153                         OptionParser parser(option);
154                         map<string,string> parameters = parser.getParameters();
155                         
156                         ValidParameters validParameter("chimera.uchime");
157                         map<string,string>::iterator it;
158                         
159                         //check to make sure all parameters are valid for command
160                         for (it = parameters.begin(); it != parameters.end(); it++) { 
161                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
162                         }
163                         
164                         vector<string> tempOutNames;
165                         outputTypes["chimera"] = tempOutNames;
166                         outputTypes["accnos"] = tempOutNames;
167                         outputTypes["alns"] = tempOutNames;
168             outputTypes["count"] = tempOutNames;
169                         
170                         //if the user changes the input directory command factory will send this info to us in the output parameter 
171                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
172                         if (inputDir == "not found"){   inputDir = "";          }
173                         
174                         //check for required parameters
175                         fastafile = validParameter.validFile(parameters, "fasta", false);
176                         if (fastafile == "not found") {                                 
177                                 //if there is a current fasta file, use it
178                                 string filename = m->getFastaFile(); 
179                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
181                         }else { 
182                                 m->splitAtDash(fastafile, fastaFileNames);
183                                 
184                                 //go through files and make sure they are good, if not, then disregard them
185                                 for (int i = 0; i < fastaFileNames.size(); i++) {
186                                         
187                                         bool ignore = false;
188                                         if (fastaFileNames[i] == "current") { 
189                                                 fastaFileNames[i] = m->getFastaFile(); 
190                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
191                                                 else {  
192                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
193                                                         //erase from file list
194                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
195                                                         i--;
196                                                 }
197                                         }
198                                         
199                                         if (!ignore) {
200                                                 
201                                                 if (inputDir != "") {
202                                                         string path = m->hasPath(fastaFileNames[i]);
203                                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
205                                                 }
206                                                 
207                                                 int ableToOpen;
208                                                 ifstream in;
209                                                 
210                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
211                                                 
212                                                 //if you can't open it, try default location
213                                                 if (ableToOpen == 1) {
214                                                         if (m->getDefaultPath() != "") { //default path is set
215                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
217                                                                 ifstream in2;
218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219                                                                 in2.close();
220                                                                 fastaFileNames[i] = tryPath;
221                                                         }
222                                                 }
223                                                 
224                                                 if (ableToOpen == 1) {
225                                                         if (m->getOutputDir() != "") { //default path is set
226                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
228                                                                 ifstream in2;
229                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
230                                                                 in2.close();
231                                                                 fastaFileNames[i] = tryPath;
232                                                         }
233                                                 }
234                                                 
235                                                 in.close();
236                                                 
237                                                 if (ableToOpen == 1) { 
238                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
239                                                         //erase from file list
240                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
241                                                         i--;
242                                                 }else {
243                                                         m->setFastaFile(fastaFileNames[i]);
244                                                 }
245                                         }
246                                 }
247                                 
248                                 //make sure there is at least one valid file left
249                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
250                         }
251                         
252                         
253                         //check for required parameters
254                         namefile = validParameter.validFile(parameters, "name", false);
255                         if (namefile == "not found") { namefile = "";   }
256                         else { 
257                                 m->splitAtDash(namefile, nameFileNames);
258                                 
259                                 //go through files and make sure they are good, if not, then disregard them
260                                 for (int i = 0; i < nameFileNames.size(); i++) {
261                                         
262                                         bool ignore = false;
263                                         if (nameFileNames[i] == "current") { 
264                                                 nameFileNames[i] = m->getNameFile(); 
265                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
266                                                 else {  
267                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
268                                                         //erase from file list
269                                                         nameFileNames.erase(nameFileNames.begin()+i);
270                                                         i--;
271                                                 }
272                                         }
273                                         
274                                         if (!ignore) {
275                                                 
276                                                 if (inputDir != "") {
277                                                         string path = m->hasPath(nameFileNames[i]);
278                                                         //if the user has not given a path then, add inputdir. else leave path alone.
279                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
280                                                 }
281                                                 
282                                                 int ableToOpen;
283                                                 ifstream in;
284                                                 
285                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
286                                                 
287                                                 //if you can't open it, try default location
288                                                 if (ableToOpen == 1) {
289                                                         if (m->getDefaultPath() != "") { //default path is set
290                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
292                                                                 ifstream in2;
293                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
294                                                                 in2.close();
295                                                                 nameFileNames[i] = tryPath;
296                                                         }
297                                                 }
298                                                 
299                                                 if (ableToOpen == 1) {
300                                                         if (m->getOutputDir() != "") { //default path is set
301                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
303                                                                 ifstream in2;
304                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
305                                                                 in2.close();
306                                                                 nameFileNames[i] = tryPath;
307                                                         }
308                                                 }
309                                                 
310                                                 in.close();
311                                                 
312                                                 if (ableToOpen == 1) { 
313                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
314                                                         //erase from file list
315                                                         nameFileNames.erase(nameFileNames.begin()+i);
316                                                         i--;
317                                                 }else {
318                                                         m->setNameFile(nameFileNames[i]);
319                                                 }
320                                         }
321                                 }
322                         }
323             
324             if (nameFileNames.size() != 0) { hasName = true; }
325             
326             //check for required parameters
327             vector<string> countfileNames;
328                         countfile = validParameter.validFile(parameters, "count", false);
329                         if (countfile == "not found") { 
330                 countfile = "";  
331                         }else { 
332                                 m->splitAtDash(countfile, countfileNames);
333                                 
334                                 //go through files and make sure they are good, if not, then disregard them
335                                 for (int i = 0; i < countfileNames.size(); i++) {
336                                         
337                                         bool ignore = false;
338                                         if (countfileNames[i] == "current") { 
339                                                 countfileNames[i] = m->getCountTableFile(); 
340                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
341                                                 else {  
342                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
343                                                         //erase from file list
344                                                         countfileNames.erase(countfileNames.begin()+i);
345                                                         i--;
346                                                 }
347                                         }
348                                         
349                                         if (!ignore) {
350                                                 
351                                                 if (inputDir != "") {
352                                                         string path = m->hasPath(countfileNames[i]);
353                                                         //if the user has not given a path then, add inputdir. else leave path alone.
354                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
355                                                 }
356                                                 
357                                                 int ableToOpen;
358                                                 ifstream in;
359                                                 
360                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
361                                                 
362                                                 //if you can't open it, try default location
363                                                 if (ableToOpen == 1) {
364                                                         if (m->getDefaultPath() != "") { //default path is set
365                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
367                                                                 ifstream in2;
368                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
369                                                                 in2.close();
370                                                                 countfileNames[i] = tryPath;
371                                                         }
372                                                 }
373                                                 
374                                                 if (ableToOpen == 1) {
375                                                         if (m->getOutputDir() != "") { //default path is set
376                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
378                                                                 ifstream in2;
379                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
380                                                                 in2.close();
381                                                                 countfileNames[i] = tryPath;
382                                                         }
383                                                 }
384                                                 
385                                                 in.close();
386                                                 
387                                                 if (ableToOpen == 1) { 
388                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
389                                                         //erase from file list
390                                                         countfileNames.erase(countfileNames.begin()+i);
391                                                         i--;
392                                                 }else {
393                                                         m->setCountTableFile(countfileNames[i]);
394                                                 }
395                                         }
396                                 }
397                         }
398             
399             if (countfileNames.size() != 0) { hasCount = true; }
400             
401                         //make sure there is at least one valid file left
402             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
403             
404             if (!hasName && hasCount) { nameFileNames = countfileNames; }
405             
406                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
407                         
408                         bool hasGroup = true;
409                         groupfile = validParameter.validFile(parameters, "group", false);
410                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
411                         else { 
412                                 m->splitAtDash(groupfile, groupFileNames);
413                                 
414                                 //go through files and make sure they are good, if not, then disregard them
415                                 for (int i = 0; i < groupFileNames.size(); i++) {
416                                         
417                                         bool ignore = false;
418                                         if (groupFileNames[i] == "current") { 
419                                                 groupFileNames[i] = m->getGroupFile(); 
420                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
421                                                 else {  
422                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
423                                                         //erase from file list
424                                                         groupFileNames.erase(groupFileNames.begin()+i);
425                                                         i--;
426                                                 }
427                                         }
428                                         
429                                         if (!ignore) {
430                                                 
431                                                 if (inputDir != "") {
432                                                         string path = m->hasPath(groupFileNames[i]);
433                                                         //if the user has not given a path then, add inputdir. else leave path alone.
434                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
435                                                 }
436                                                 
437                                                 int ableToOpen;
438                                                 ifstream in;
439                                                 
440                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
441                                                 
442                                                 //if you can't open it, try default location
443                                                 if (ableToOpen == 1) {
444                                                         if (m->getDefaultPath() != "") { //default path is set
445                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
447                                                                 ifstream in2;
448                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
449                                                                 in2.close();
450                                                                 groupFileNames[i] = tryPath;
451                                                         }
452                                                 }
453                                                 
454                                                 if (ableToOpen == 1) {
455                                                         if (m->getOutputDir() != "") { //default path is set
456                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
458                                                                 ifstream in2;
459                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
460                                                                 in2.close();
461                                                                 groupFileNames[i] = tryPath;
462                                                         }
463                                                 }
464                                                 
465                                                 in.close();
466                                                 
467                                                 if (ableToOpen == 1) { 
468                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
469                                                         //erase from file list
470                                                         groupFileNames.erase(groupFileNames.begin()+i);
471                                                         i--;
472                                                 }else {
473                                                         m->setGroupFile(groupFileNames[i]);
474                                                 }
475                                         }
476                                 }
477                                 
478                                 //make sure there is at least one valid file left
479                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480                         }
481                         
482                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
483                         
484             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
485                         //if the user changes the output directory command factory will send this info to us in the output parameter 
486                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
487                         
488                         
489                         //if the user changes the output directory command factory will send this info to us in the output parameter 
490                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
491                         
492                         string path;
493                         it = parameters.find("reference");
494                         //user has given a template file
495                         if(it != parameters.end()){ 
496                                 if (it->second == "self") { templatefile = "self"; }
497                                 else {
498                                         path = m->hasPath(it->second);
499                                         //if the user has not given a path then, add inputdir. else leave path alone.
500                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
501                                         
502                                         templatefile = validParameter.validFile(parameters, "reference", true);
503                                         if (templatefile == "not open") { abort = true; }
504                                         else if (templatefile == "not found") { //check for saved reference sequences
505                                                 if (rdb->getSavedReference() != "") {
506                                                         templatefile = rdb->getSavedReference();
507                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
508                                                 }else {
509                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
510                                                         m->mothurOutEndLine();
511                                                         abort = true; 
512                                                 }
513                                         }
514                                 }
515                         }else if (hasName) {  templatefile = "self"; }
516             else if (hasCount) {  templatefile = "self"; }
517                         else { 
518                                 if (rdb->getSavedReference() != "") {
519                                         templatefile = rdb->getSavedReference();
520                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
521                                 }else {
522                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
523                                         m->mothurOutEndLine();
524                                         templatefile = ""; abort = true; 
525                                 } 
526                         }
527                                 
528                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
529                         m->setProcessors(temp);
530                         m->mothurConvert(temp, processors);
531                         
532                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
533                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
534                         
535                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
536                         chimealns = m->isTrue(temp); 
537                         
538                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
539                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
540                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
541                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
542                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
543                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
544                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
545                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
546                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
547                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
548                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
549                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
550             
551             strand = validParameter.validFile(parameters, "strand", false);     if (strand == "not found")      {  strand = ""; }
552                         
553                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
554                         ucl = m->isTrue(temp);
555                         
556                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
557                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
558                         
559                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
560                         skipgaps = m->isTrue(temp); 
561
562                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
563                         skipgaps2 = m->isTrue(temp); 
564             
565             
566                         temp = validParameter.validFile(parameters, "dereplicate", false);      
567                         if (temp == "not found") { 
568                                 if (groupfile != "")    {  temp = "false";                                      }
569                                 else                    {  temp = "true";       }
570                         }
571                         dups = m->isTrue(temp);
572
573                         
574                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
576                         
577                         //look for uchime exe
578                         path = m->argv;
579                         string tempPath = path;
580                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581                         path = path.substr(0, (tempPath.find_last_of('m')));
582                         
583                         string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
586             if (m->debug) { 
587                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
588                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589                 newCommand->execute();
590                 delete newCommand;
591                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
592                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593                 newCommand->execute();
594                 delete newCommand;
595             }
596 #else
597                         uchimeCommand = path + "uchime.exe";
598 #endif
599         
600                         //test to make sure uchime exists
601                         ifstream in;
602                         uchimeCommand = m->getFullPathName(uchimeCommand);
603                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604                         if(ableToOpen == 1) {   
605                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606                 //check to see if uchime is in the path??
607                 
608                 string uLocation = m->findProgramPath("uchime");
609                 
610                 
611                 ifstream in2;
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
614 #else
615                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
616 #endif
617
618                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
619                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620             }else {  uchimeLocation = uchimeCommand; }
621             
622             uchimeLocation = m->getFullPathName(uchimeLocation);
623         }
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
627                 exit(1);
628         }
629 }
630 //***************************************************************************************************************
631
632 int ChimeraUchimeCommand::execute(){
633         try{
634         
635         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
636                 
637                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
638                 
639                 for (int s = 0; s < fastaFileNames.size(); s++) {
640                         
641                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
642                         
643                         int start = time(NULL); 
644                         string nameFile = "";
645                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
646                         map<string, string> variables; 
647             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648                         string outputFileName = getOutputFileName("chimera", variables);
649                         string accnosFileName = getOutputFileName("accnos", variables);
650                         string alnsFileName = getOutputFileName("alns", variables);
651                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652             string newCountFile = "";
653                                 
654                         //you provided a groupfile
655                         string groupFile = "";
656             bool hasGroup = false;
657                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
658             else if (hasCount) {
659                 CountTable ct;
660                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662                 newCountFile = getOutputFileName("count", variables);
663             }
664                         
665                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
666
667                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669                                         nameFile = nameFileNames[s];
670                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
671                                                                                 
672                                 map<string, string> seqs;  
673                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
674
675                                 //read namefile
676                                 vector<seqPriorityNode> nameMapCount;
677                 int error;
678                 if (hasCount) {
679                     CountTable ct;
680                     ct.readTable(nameFile);
681                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682                         int num = ct.getNumSeqs(it->first);
683                         if (num == 0) { error = 1; }
684                         else {
685                             seqPriorityNode temp(num, it->second, it->first);
686                             nameMapCount.push_back(temp);
687                         }
688                     }
689                 }else {
690                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
691                 }
692                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
693                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
694                                 
695                                 printFile(nameMapCount, newFasta);
696                                 fastaFileNames[s] = newFasta;
697                         }
698                         
699                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
700                         
701                         if (hasGroup) {
702                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703                                         nameFile = nameFileNames[s];
704                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
705                                 
706                                 //Parse sequences by group
707                 vector<string> groups;
708                 map<string, string> uniqueNames;
709                 if (hasCount) {
710                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711                     groups = cparser->getNamesOfGroups();
712                     uniqueNames = cparser->getAllSeqsMap();
713                 }else{
714                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715                     groups = sparser->getNamesOfGroups();
716                     uniqueNames = sparser->getAllSeqsMap();
717                 }
718                                         
719                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
720                                                                 
721                                 //clears files
722                                 ofstream out, out1, out2;
723                                 m->openOutputFile(outputFileName, out); out.close(); 
724                                 m->openOutputFile(accnosFileName, out1); out1.close();
725                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
726                                 int totalSeqs = 0;
727                                 
728                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
729                     
730                     if (hasCount && dups) {
731                         CountTable c; c.readTable(nameFile);
732                         if (!m->isBlank(newCountFile)) {
733                             ifstream in2;
734                             m->openInputFile(newCountFile, in2);
735                             
736                             string name, group;
737                             while (!in2.eof()) {
738                                 in2 >> name >> group; m->gobble(in2);
739                                 c.setAbund(name, group, 0);
740                             }
741                             in2.close();
742                         }
743                         m->mothurRemove(newCountFile);
744                         c.printTable(newCountFile);
745                     }
746
747                 }else                           {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
748
749                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
750                
751                 
752                 if (!dups) { 
753                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
754                                 
755                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
756                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
757                                 }else {
758                     
759                     if (hasCount) {
760                         set<string> doNotRemove;
761                         CountTable c; c.readTable(newCountFile);
762                         vector<string> namesInTable = c.getNamesOfSeqs();
763                         for (int i = 0; i < namesInTable.size(); i++) {
764                             int temp = c.getNumSeqs(namesInTable[i]);
765                             if (temp == 0) {  c.remove(namesInTable[i]);  }
766                             else { doNotRemove.insert((namesInTable[i])); }
767                         }
768                         //remove names we want to keep from accnos file.
769                         set<string> accnosNames = m->readAccnos(accnosFileName);
770                         ofstream out2;
771                         m->openOutputFile(accnosFileName, out2);
772                         for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
773                             if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
774                         }
775                         out2.close();
776                         c.printTable(newCountFile);
777                         outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
778                     }
779                 }
780                 
781                 if (hasCount) { delete cparser; }
782                 else { delete sparser; }
783                 
784                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
785                                         
786                         }else{
787                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
788                         
789                                 int numSeqs = 0;
790                                 int numChimeras = 0;
791
792                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
794                                 
795                                 //add headings
796                                 ofstream out;
797                                 m->openOutputFile(outputFileName+".temp", out); 
798                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
799                                 out.close();
800                                 
801                                 m->appendFiles(outputFileName, outputFileName+".temp");
802                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
803                                 
804                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
805                         
806                                 //remove file made for uchime
807                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
808                         
809                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
810                         }
811                         
812                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
813                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
814                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
815                 }
816         
817                 //set accnos file as new current accnosfile
818                 string current = "";
819                 itTypes = outputTypes.find("accnos");
820                 if (itTypes != outputTypes.end()) {
821                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
822                 }
823                 
824                 m->mothurOutEndLine();
825                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
826                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
827                 m->mothurOutEndLine();
828                 
829                 return 0;
830                 
831         }
832         catch(exception& e) {
833                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
834                 exit(1);
835         }
836 }
837 //**********************************************************************************************************************
838 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
839         try {
840                 map<string, string>::iterator itUnique;
841                 int total = 0;
842                 
843                 ofstream out2;
844                 m->openOutputFile(accnosFileName+".temp", out2);
845                 
846                 string name;
847                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
848                 set<string>::iterator itNames;
849                 set<string> chimerasInFile;
850                 set<string>::iterator itChimeras;
851
852         if (!m->isBlank(accnosFileName)) {
853             //edit accnos file
854             ifstream in2;
855             m->openInputFile(accnosFileName, in2);
856             
857             while (!in2.eof()) {
858                 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
859                 
860                 in2 >> name; m->gobble(in2);
861                 
862                 //find unique name
863                 itUnique = uniqueNames.find(name);
864                 
865                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
866                 else {
867                     itChimeras = chimerasInFile.find((itUnique->second));
868                     
869                     if (itChimeras == chimerasInFile.end()) {
870                         out2 << itUnique->second << endl;
871                         chimerasInFile.insert((itUnique->second));
872                         total++;
873                     }
874                 }
875             }
876             in2.close();
877         }
878                 out2.close();
879                 
880                 m->mothurRemove(accnosFileName);
881                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
882                 
883                 
884                 
885                 //edit chimera file
886                 ifstream in; 
887                 m->openInputFile(outputFileName, in);
888                 
889                 ofstream out;
890                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
891                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
892                 
893                 float temp1;
894                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
895                 name = "";
896                 namesInFile.clear();    
897                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
898                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
899                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
900                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
901                 */
902                 
903                 while (!in.eof()) {
904                         
905                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
906                         
907                         bool print = false;
908                         in >> temp1;    m->gobble(in);
909                         in >> name;             m->gobble(in);
910                         in >> parent1;  m->gobble(in);
911                         in >> parent2;  m->gobble(in);
912                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
913                         m->gobble(in);
914                         
915                         //parse name - name will look like U68590/ab=1/
916                         string restOfName = "";
917                         int pos = name.find_first_of('/');
918                         if (pos != string::npos) {
919                                 restOfName = name.substr(pos);
920                                 name = name.substr(0, pos);
921                         }
922                         
923                         //find unique name
924                         itUnique = uniqueNames.find(name);
925                         
926                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
927                         else {
928                                 name = itUnique->second;
929                                 //is this name already in the file
930                                 itNames = namesInFile.find((name));
931                                 
932                                 if (itNames == namesInFile.end()) { //no not in file
933                                         if (flag == "N") { //are you really a no??
934                                                 //is this sequence really not chimeric??
935                                                 itChimeras = chimerasInFile.find(name);
936                                                 
937                                                 //then you really are a no so print, otherwise skip
938                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
939                                         }else{ print = true; }
940                                 }
941                         }
942                         
943                         if (print) {
944                                 out << temp1 << '\t' << name << restOfName << '\t';
945                                 namesInFile.insert(name);
946                                 
947                                 //parse parent1 names
948                                 if (parent1 != "*") {
949                                         restOfName = "";
950                                         pos = parent1.find_first_of('/');
951                                         if (pos != string::npos) {
952                                                 restOfName = parent1.substr(pos);
953                                                 parent1 = parent1.substr(0, pos);
954                                         }
955                                         
956                                         itUnique = uniqueNames.find(parent1);
957                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
958                                         else {  out << itUnique->second << restOfName << '\t';  }
959                                 }else { out << parent1 << '\t'; }
960                                 
961                                 //parse parent2 names
962                                 if (parent2 != "*") {
963                                         restOfName = "";
964                                         pos = parent2.find_first_of('/');
965                                         if (pos != string::npos) {
966                                                 restOfName = parent2.substr(pos);
967                                                 parent2 = parent2.substr(0, pos);
968                                         }
969                                         
970                                         itUnique = uniqueNames.find(parent2);
971                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
972                                         else {  out << itUnique->second << restOfName << '\t';  }
973                                 }else { out << parent2 << '\t'; }
974                                 
975                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
976                         }
977                 }
978                 in.close();
979                 out.close();
980                 
981                 m->mothurRemove(outputFileName);
982                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
983                 
984                                 
985                 //edit anls file
986                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
987                 /*
988                  ------------------------------------------------------------------------
989                  Query   (  179 nt) F21Fcsw_11639/ab=591/
990                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
991                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
992                  
993                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
994                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
995                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
996                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
997                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
998                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
999                  
1000                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1001                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1002                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1003                  Diffs      NNN     N N  N                   N  N BB    NNN                              
1004                  Votes      000     0 0  0                   0  0 ++    000                              
1005                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1006                  
1007                  A   159 TGGAACTGAGACACGGTCCAA 179
1008                  Q   159 TGGAACTGAGACACGGTCCAA 179
1009                  B   161 TGGAACTGAGACACGGTCCAA 181
1010                  Diffs                        
1011                  Votes                        
1012                  Model   BBBBBBBBBBBBBBBBBBBBB
1013                  
1014                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1015                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1016                 */
1017                 if (chimealns) {
1018                         ifstream in3; 
1019                         m->openInputFile(alnsFileName, in3);
1020                 
1021                         ofstream out3;
1022                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1023                 
1024                         name = "";
1025                         namesInFile.clear();
1026                         string line = "";
1027                         
1028                         while (!in3.eof()) {
1029                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1030                                 
1031                                 line = "";
1032                                 line = m->getline(in3); 
1033                                 string temp = "";
1034                                 
1035                                 if (line != "") {
1036                                         istringstream iss(line);
1037                                         iss >> temp;
1038                                         
1039                                         //are you a name line
1040                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1041                                                 int spot = 0;
1042                                                 for (int i = 0; i < line.length(); i++) {
1043                                                         spot = i;
1044                                                         if (line[i] == ')') { break; }
1045                                                         else { out3 << line[i]; }
1046                                                 }
1047                                                 
1048                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1049                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1050                                                 else {
1051                                                         out << line[spot] << line[spot+1];
1052                                                         
1053                                                         name = line.substr(spot+2);
1054                                                         
1055                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1056                                                         string restOfName = "";
1057                                                         int pos = name.find_first_of('/');
1058                                                         if (pos != string::npos) {
1059                                                                 restOfName = name.substr(pos);
1060                                                                 name = name.substr(0, pos);
1061                                                         }
1062                                                         
1063                                                         //find unique name
1064                                                         itUnique = uniqueNames.find(name);
1065                                                         
1066                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1067                                                         else {
1068                                                                 //only limit repeats on query names
1069                                                                 if (temp == "Query") {
1070                                                                         itNames = namesInFile.find((itUnique->second));
1071                                                                         
1072                                                                         if (itNames == namesInFile.end()) {
1073                                                                                 out << itUnique->second << restOfName << endl;
1074                                                                                 namesInFile.insert((itUnique->second));
1075                                                                         }
1076                                                                 }else { out << itUnique->second << restOfName << endl;  }
1077                                                         }
1078                                                         
1079                                                 }
1080                                                 
1081                                         }else { //not need to alter line
1082                                                 out3 << line << endl;
1083                                         }
1084                                 }else { out3 << endl; }
1085                         }
1086                         in3.close();
1087                         out3.close();
1088                         
1089                         m->mothurRemove(alnsFileName);
1090                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1091                 }
1092                 
1093                 return total;
1094         }
1095         catch(exception& e) {
1096                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1097                 exit(1);
1098         }
1099 }       
1100 //**********************************************************************************************************************
1101 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1102         try {
1103                 
1104                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1105                 
1106                 ofstream out;
1107                 m->openOutputFile(filename, out);
1108                 
1109                 //print new file in order of
1110                 for (int i = 0; i < nameMapCount.size(); i++) {
1111                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1112                 }
1113                 out.close();
1114                 
1115                 return 0;
1116         }
1117         catch(exception& e) {
1118                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1119                 exit(1);
1120         }
1121 }       
1122 //**********************************************************************************************************************
1123 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1124         try {
1125                 //create input file for uchime
1126                 //read through fastafile and store info
1127                 ifstream in;
1128                 m->openInputFile(filename, in);
1129                 
1130                 while (!in.eof()) {
1131                         
1132                         if (m->control_pressed) { in.close(); return 0; }
1133                         
1134                         Sequence seq(in); m->gobble(in);
1135                         seqs[seq.getName()] = seq.getAligned();
1136                 }
1137                 in.close();
1138                 
1139                 return 0;
1140         }
1141         catch(exception& e) {
1142                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1143                 exit(1);
1144         }
1145 }       
1146 //**********************************************************************************************************************
1147
1148 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1149         try {
1150                 string nameFile = "";
1151                 
1152                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1153                 
1154                 //use unique.seqs to create new name and fastafile
1155                 string inputString = "fasta=" + inputFile;
1156                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1157                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1158                 m->mothurCalling = true;
1159         
1160                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1161                 uniqueCommand->execute();
1162                 
1163                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1164                 
1165                 delete uniqueCommand;
1166                 m->mothurCalling = false;
1167                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1168                 
1169                 nameFile = filenames["name"][0];
1170                 inputFile = filenames["fasta"][0];
1171                 
1172                 return nameFile;
1173         }
1174         catch(exception& e) {
1175                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1176                 exit(1);
1177         }
1178 }
1179 //**********************************************************************************************************************
1180 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1181         try {
1182                 
1183                 int totalSeqs = 0;
1184                 int numChimeras = 0;
1185         
1186         
1187         ofstream outCountList;
1188         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1189         
1190                 for (int i = start; i < end; i++) {
1191                         int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
1192             
1193                         int error;
1194             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1195             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1196                         
1197                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1198                         totalSeqs += numSeqs;
1199                         
1200                         if (m->control_pressed) { return 0; }
1201                         
1202                         //remove file made for uchime
1203                         if (!m->debug) {  m->mothurRemove(filename);  }
1204             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1205                         
1206             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1207             //This table will zero out group counts for seqs determined to be chimeric by that group.
1208             if (dups) {
1209                 if (!m->isBlank(accnos+groups[i])) {
1210                     ifstream in;
1211                     m->openInputFile(accnos+groups[i], in);
1212                     string name;
1213                     if (hasCount) {
1214                         while (!in.eof()) {
1215                             in >> name; m->gobble(in);
1216                             outCountList << name << '\t' << groups[i] << endl;
1217                         }
1218                         in.close();
1219                     }else {
1220                         map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1221                         map<string, string>::iterator itN;
1222                         ofstream out;
1223                         m->openOutputFile(accnos+groups[i]+".temp", out);
1224                         while (!in.eof()) {
1225                             in >> name; m->gobble(in); 
1226                             itN = thisnamemap.find(name);
1227                             if (itN != thisnamemap.end()) {
1228                                 vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
1229                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1230                                 
1231                             }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1232                         }
1233                         out.close();
1234                         in.close();
1235                         m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1236                     }
1237                    
1238                 }
1239             }
1240             
1241                         //append files
1242                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1243                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1244                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1245                         
1246                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1247                 }
1248
1249         if (hasCount && dups) { outCountList.close(); }
1250         
1251         return totalSeqs;
1252                 
1253         }
1254         catch(exception& e) {
1255                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1256                 exit(1);
1257         }
1258 }       
1259 //**********************************************************************************************************************
1260
1261 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1262         try {
1263                 
1264                 outputFName = m->getFullPathName(outputFName);
1265                 filename = m->getFullPathName(filename);
1266                 alns = m->getFullPathName(alns);
1267                 
1268                 //to allow for spaces in the path
1269                 outputFName = "\"" + outputFName + "\"";
1270                 filename = "\"" + filename + "\"";
1271                 alns = "\"" + alns + "\"";
1272                                 
1273                 vector<char*> cPara;
1274                 
1275                 string uchimeCommand = uchimeLocation;
1276         uchimeCommand = "\"" + uchimeCommand + "\" ";
1277         
1278         char* tempUchime;
1279                 tempUchime= new char[uchimeCommand.length()+1]; 
1280                 *tempUchime = '\0';
1281                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1282                 cPara.push_back(tempUchime);
1283                 
1284         //are you using a reference file
1285                 if (templatefile != "self") {
1286             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1287             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1288             filename = outputFileName;
1289             filename = "\"" + filename + "\"";
1290                         //add reference file
1291                         char* tempRef = new char[5]; 
1292                         //strcpy(tempRef, "--db"); 
1293                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1294                         cPara.push_back(tempRef);  
1295                         char* tempR = new char[templatefile.length()+1];
1296                         //strcpy(tempR, templatefile.c_str());
1297                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1298                         cPara.push_back(tempR);
1299                 }
1300                 
1301                 char* tempIn = new char[8]; 
1302                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1303                 //strcpy(tempIn, "--input"); 
1304                 cPara.push_back(tempIn);
1305                 char* temp = new char[filename.length()+1];
1306                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1307                 //strcpy(temp, filename.c_str());
1308                 cPara.push_back(temp);
1309                 
1310                 char* tempO = new char[12]; 
1311                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1312                 //strcpy(tempO, "--uchimeout"); 
1313                 cPara.push_back(tempO);
1314                 char* tempout = new char[outputFName.length()+1];
1315                 //strcpy(tempout, outputFName.c_str());
1316                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1317                 cPara.push_back(tempout);
1318                 
1319                 if (chimealns) {
1320                         char* tempA = new char[13]; 
1321                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1322                         //strcpy(tempA, "--uchimealns"); 
1323                         cPara.push_back(tempA);
1324                         char* tempa = new char[alns.length()+1];
1325                         //strcpy(tempa, alns.c_str());
1326                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1327                         cPara.push_back(tempa);
1328                 }
1329         
1330         if (strand != "") {
1331                         char* tempA = new char[9]; 
1332                         *tempA = '\0'; strncat(tempA, "--strand", 8);
1333                         cPara.push_back(tempA);
1334                         char* tempa = new char[strand.length()+1];
1335                         *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1336                         cPara.push_back(tempa);
1337                 }
1338                 
1339                 if (useAbskew) {
1340                         char* tempskew = new char[9];
1341                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1342                         //strcpy(tempskew, "--abskew"); 
1343                         cPara.push_back(tempskew);
1344                         char* tempSkew = new char[abskew.length()+1];
1345                         //strcpy(tempSkew, abskew.c_str());
1346                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1347                         cPara.push_back(tempSkew);
1348                 }
1349                 
1350                 if (useMinH) {
1351                         char* tempminh = new char[7]; 
1352                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1353                         //strcpy(tempminh, "--minh"); 
1354                         cPara.push_back(tempminh);
1355                         char* tempMinH = new char[minh.length()+1];
1356                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1357                         //strcpy(tempMinH, minh.c_str());
1358                         cPara.push_back(tempMinH);
1359                 }
1360                 
1361                 if (useMindiv) {
1362                         char* tempmindiv = new char[9]; 
1363                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1364                         //strcpy(tempmindiv, "--mindiv"); 
1365                         cPara.push_back(tempmindiv);
1366                         char* tempMindiv = new char[mindiv.length()+1];
1367                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1368                         //strcpy(tempMindiv, mindiv.c_str());
1369                         cPara.push_back(tempMindiv);
1370                 }
1371                 
1372                 if (useXn) {
1373                         char* tempxn = new char[5]; 
1374                         //strcpy(tempxn, "--xn"); 
1375                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1376                         cPara.push_back(tempxn);
1377                         char* tempXn = new char[xn.length()+1];
1378                         //strcpy(tempXn, xn.c_str());
1379                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1380                         cPara.push_back(tempXn);
1381                 }
1382                 
1383                 if (useDn) {
1384                         char* tempdn = new char[5]; 
1385                         //strcpy(tempdn, "--dn"); 
1386                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1387                         cPara.push_back(tempdn);
1388                         char* tempDn = new char[dn.length()+1];
1389                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1390                         //strcpy(tempDn, dn.c_str());
1391                         cPara.push_back(tempDn);
1392                 }
1393                 
1394                 if (useXa) {
1395                         char* tempxa = new char[5]; 
1396                         //strcpy(tempxa, "--xa"); 
1397                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1398                         cPara.push_back(tempxa);
1399                         char* tempXa = new char[xa.length()+1];
1400                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1401                         //strcpy(tempXa, xa.c_str());
1402                         cPara.push_back(tempXa);
1403                 }
1404                 
1405                 if (useChunks) {
1406                         char* tempchunks = new char[9]; 
1407                         //strcpy(tempchunks, "--chunks"); 
1408                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1409                         cPara.push_back(tempchunks);
1410                         char* tempChunks = new char[chunks.length()+1];
1411                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1412                         //strcpy(tempChunks, chunks.c_str());
1413                         cPara.push_back(tempChunks);
1414                 }
1415                 
1416                 if (useMinchunk) {
1417                         char* tempminchunk = new char[11]; 
1418                         //strcpy(tempminchunk, "--minchunk"); 
1419                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1420                         cPara.push_back(tempminchunk);
1421                         char* tempMinchunk = new char[minchunk.length()+1];
1422                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1423                         //strcpy(tempMinchunk, minchunk.c_str());
1424                         cPara.push_back(tempMinchunk);
1425                 }
1426                 
1427                 if (useIdsmoothwindow) {
1428                         char* tempidsmoothwindow = new char[17]; 
1429                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1430                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1431                         cPara.push_back(tempidsmoothwindow);
1432                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1433                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1434                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1435                         cPara.push_back(tempIdsmoothwindow);
1436                 }
1437                 
1438                 /*if (useMinsmoothid) {
1439                         char* tempminsmoothid = new char[14]; 
1440                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1441                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1442                         cPara.push_back(tempminsmoothid);
1443                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1444                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1445                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1446                         cPara.push_back(tempMinsmoothid);
1447                 }*/
1448                 
1449                 if (useMaxp) {
1450                         char* tempmaxp = new char[7]; 
1451                         //strcpy(tempmaxp, "--maxp"); 
1452                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1453                         cPara.push_back(tempmaxp);
1454                         char* tempMaxp = new char[maxp.length()+1];
1455                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1456                         //strcpy(tempMaxp, maxp.c_str());
1457                         cPara.push_back(tempMaxp);
1458                 }
1459                 
1460                 if (!skipgaps) {
1461                         char* tempskipgaps = new char[13]; 
1462                         //strcpy(tempskipgaps, "--[no]skipgaps");
1463                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1464                         cPara.push_back(tempskipgaps);
1465                 }
1466                 
1467                 if (!skipgaps2) {
1468                         char* tempskipgaps2 = new char[14]; 
1469                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1470                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1471                         cPara.push_back(tempskipgaps2);
1472                 }
1473                 
1474                 if (useMinlen) {
1475                         char* tempminlen = new char[9]; 
1476                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1477                         //strcpy(tempminlen, "--minlen"); 
1478                         cPara.push_back(tempminlen);
1479                         char* tempMinlen = new char[minlen.length()+1];
1480                         //strcpy(tempMinlen, minlen.c_str());
1481                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1482                         cPara.push_back(tempMinlen);
1483                 }
1484                 
1485                 if (useMaxlen) {
1486                         char* tempmaxlen = new char[9]; 
1487                         //strcpy(tempmaxlen, "--maxlen"); 
1488                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1489                         cPara.push_back(tempmaxlen);
1490                         char* tempMaxlen = new char[maxlen.length()+1];
1491                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1492                         //strcpy(tempMaxlen, maxlen.c_str());
1493                         cPara.push_back(tempMaxlen);
1494                 }
1495                 
1496                 if (ucl) {
1497                         char* tempucl = new char[5]; 
1498                         strcpy(tempucl, "--ucl"); 
1499                         cPara.push_back(tempucl);
1500                 }
1501                 
1502                 if (useQueryfract) {
1503                         char* tempqueryfract = new char[13]; 
1504                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1505                         //strcpy(tempqueryfract, "--queryfract"); 
1506                         cPara.push_back(tempqueryfract);
1507                         char* tempQueryfract = new char[queryfract.length()+1];
1508                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1509                         //strcpy(tempQueryfract, queryfract.c_str());
1510                         cPara.push_back(tempQueryfract);
1511                 }
1512                 
1513                 
1514                 char** uchimeParameters;
1515                 uchimeParameters = new char*[cPara.size()];
1516                 string commandString = "";
1517                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1518                 //int numArgs = cPara.size();
1519                 
1520                 //uchime_main(numArgs, uchimeParameters); 
1521                 //cout << "commandString = " << commandString << endl;
1522 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1523 #else
1524                 commandString = "\"" + commandString + "\"";
1525 #endif
1526         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1527                 system(commandString.c_str());
1528                 
1529                 //free memory
1530                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1531                 delete[] uchimeParameters; 
1532                 
1533                 //remove "" from filenames
1534                 outputFName = outputFName.substr(1, outputFName.length()-2);
1535                 filename = filename.substr(1, filename.length()-2);
1536                 alns = alns.substr(1, alns.length()-2);
1537                 
1538                 if (m->control_pressed) { return 0; }
1539                 
1540                 //create accnos file from uchime results
1541                 ifstream in; 
1542                 m->openInputFile(outputFName, in);
1543                 
1544                 ofstream out;
1545                 m->openOutputFile(accnos, out);
1546                 
1547                 int num = 0;
1548                 numChimeras = 0;
1549                 while(!in.eof()) {
1550                         
1551                         if (m->control_pressed) { break; }
1552                         
1553                         string name = "";
1554                         string chimeraFlag = "";
1555                         //in >> chimeraFlag >> name;
1556                         
1557             string line = m->getline(in);
1558             vector<string> pieces = m->splitWhiteSpace(line);
1559             if (pieces.size() > 2) { 
1560                 name = pieces[1];
1561                 //fix name if needed
1562                 if (templatefile == "self") { 
1563                     name = name.substr(0, name.length()-1); //rip off last /
1564                     name = name.substr(0, name.find_last_of('/'));
1565                 }
1566                 
1567                 chimeraFlag = pieces[pieces.size()-1];
1568                         }
1569                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1570                         m->gobble(in);
1571                         
1572                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1573                         num++;
1574                 }
1575                 in.close();
1576                 out.close();
1577                 
1578         //if (templatefile != "self") {  m->mothurRemove(filename); }
1579         
1580                 return num;
1581         }
1582         catch(exception& e) {
1583                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1584                 exit(1);
1585         }
1586 }
1587 /**************************************************************************************************/
1588 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1589 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1590         try {
1591         
1592         ifstream in;
1593         m->openInputFile(filename, in);
1594         
1595         ofstream out;
1596         m->openOutputFile(output, out);
1597         
1598         while (!in.eof()) {
1599             if (m->control_pressed) { break;  }
1600             
1601             Sequence seq(in); m->gobble(in);
1602             
1603             if (seq.getName() != "") { seq.printSequence(out); }
1604         }
1605         in.close();
1606         out.close();
1607         
1608         return 0;
1609     }
1610         catch(exception& e) {
1611                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1612                 exit(1);
1613         }
1614 }
1615 /**************************************************************************************************/
1616
1617 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1618         try {
1619                 
1620                 processIDS.clear();
1621                 int process = 1;
1622                 int num = 0;
1623                 vector<string> files;
1624                 
1625 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1626                 //break up file into multiple files
1627                 m->divideFile(filename, processors, files);
1628                 
1629                 if (m->control_pressed) {  return 0;  }
1630                                 
1631                 //loop through and create all the processes you want
1632                 while (process != processors) {
1633                         int pid = fork();
1634                         
1635                         if (pid > 0) {
1636                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1637                                 process++;
1638                         }else if (pid == 0){
1639                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1640                                 
1641                                 //pass numSeqs to parent
1642                                 ofstream out;
1643                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1644                                 m->openOutputFile(tempFile, out);
1645                                 out << num << endl;
1646                                 out << numChimeras << endl;
1647                                 out.close();
1648                                 
1649                                 exit(0);
1650                         }else { 
1651                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1652                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1653                                 exit(0);
1654                         }
1655                 }
1656                 
1657                 //do my part
1658                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1659                 
1660                 //force parent to wait until all the processes are done
1661                 for (int i=0;i<processIDS.size();i++) { 
1662                         int temp = processIDS[i];
1663                         wait(&temp);
1664                 }
1665                 
1666                 for (int i = 0; i < processIDS.size(); i++) {
1667                         ifstream in;
1668                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1669                         m->openInputFile(tempFile, in);
1670                         if (!in.eof()) { 
1671                                 int tempNum = 0; 
1672                                 in >> tempNum; m->gobble(in);
1673                                 num += tempNum; 
1674                                 in >> tempNum;
1675                                 numChimeras += tempNum;
1676                         }
1677                         in.close(); m->mothurRemove(tempFile);
1678                 }
1679 #else
1680                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1681                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1682                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1683                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1684                 
1685                 //divide file
1686                 int count = 0;
1687                 int spot = 0;
1688                 map<int, ofstream*> filehandles;
1689                 map<int, ofstream*>::iterator it3;
1690                 
1691                 ofstream* temp;
1692                 for (int i = 0; i < processors; i++) {
1693                         temp = new ofstream;
1694                         filehandles[i] = temp;
1695                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1696                         files.push_back(filename+toString(i)+".temp");
1697                 }
1698                 
1699                 ifstream in;
1700                 m->openInputFile(filename, in);
1701                 
1702                 while(!in.eof()) {
1703                         
1704                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1705                         
1706                         Sequence tempSeq(in); m->gobble(in); 
1707                         
1708                         if (tempSeq.getName() != "") {
1709                                 tempSeq.printSequence(*(filehandles[spot])); 
1710                                 spot++; count++;
1711                                 if (spot == processors) { spot = 0; }
1712                         }
1713                 }
1714                 in.close();
1715                 
1716                 //delete memory
1717                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1718                         (*(it3->second)).close();
1719                         delete it3->second;
1720                 }
1721                 
1722                 //sanity check for number of processors
1723                 if (count < processors) { processors = count; }
1724                 
1725                 vector<uchimeData*> pDataArray; 
1726                 DWORD   dwThreadIdArray[processors-1];
1727                 HANDLE  hThreadArray[processors-1]; 
1728                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1729                 
1730                 //Create processor worker threads.
1731                 for( int i=1; i<processors; i++ ){
1732                         // Allocate memory for thread data.
1733                         string extension = toString(i) + ".temp";
1734                         
1735                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
1736                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1737                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1738                         
1739                         pDataArray.push_back(tempUchime);
1740                         processIDS.push_back(i);
1741                         
1742                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1743                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1744                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1745                 }
1746                 
1747                 
1748                 //using the main process as a worker saves time and memory
1749                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1750                 
1751                 //Wait until all threads have terminated.
1752                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1753                 
1754                 //Close all thread handles and free memory allocations.
1755                 for(int i=0; i < pDataArray.size(); i++){
1756                         num += pDataArray[i]->count;
1757                         numChimeras += pDataArray[i]->numChimeras;
1758                         CloseHandle(hThreadArray[i]);
1759                         delete pDataArray[i];
1760                 }
1761 #endif          
1762                 
1763                 //append output files
1764                 for(int i=0;i<processIDS.size();i++){
1765                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1766                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1767                         
1768                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1769                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1770                         
1771                         if (chimealns) {
1772                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1773                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1774                         }
1775                 }
1776                 
1777                 //get rid of the file pieces.
1778                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1779                 return num;     
1780         }
1781         catch(exception& e) {
1782                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1783                 exit(1);
1784         }
1785 }
1786 /**************************************************************************************************/
1787
1788 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1789         try {
1790                 
1791                 processIDS.clear();
1792                 int process = 1;
1793                 int num = 0;
1794         
1795         CountTable newCount;
1796         if (hasCount && dups) { newCount.readTable(nameFile); }
1797                 
1798                 //sanity check
1799                 if (groups.size() < processors) { processors = groups.size(); }
1800                 
1801                 //divide the groups between the processors
1802                 vector<linePair> lines;
1803                 int numGroupsPerProcessor = groups.size() / processors;
1804                 for (int i = 0; i < processors; i++) {
1805                         int startIndex =  i * numGroupsPerProcessor;
1806                         int endIndex = (i+1) * numGroupsPerProcessor;
1807                         if(i == (processors - 1)){      endIndex = groups.size();       }
1808                         lines.push_back(linePair(startIndex, endIndex));
1809                 }
1810                 
1811 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1812                                 
1813                 //loop through and create all the processes you want
1814                 while (process != processors) {
1815                         int pid = fork();
1816                         
1817                         if (pid > 0) {
1818                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1819                                 process++;
1820                         }else if (pid == 0){
1821                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1822                                 
1823                                 //pass numSeqs to parent
1824                                 ofstream out;
1825                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1826                                 m->openOutputFile(tempFile, out);
1827                                 out << num << endl;
1828                                 out.close();
1829                                 
1830                                 exit(0);
1831                         }else { 
1832                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1833                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1834                                 exit(0);
1835                         }
1836                 }
1837                 
1838                 //do my part
1839                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1840                 
1841                 //force parent to wait until all the processes are done
1842                 for (int i=0;i<processIDS.size();i++) { 
1843                         int temp = processIDS[i];
1844                         wait(&temp);
1845                 }
1846         
1847                 for (int i = 0; i < processIDS.size(); i++) {
1848                         ifstream in;
1849                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1850                         m->openInputFile(tempFile, in);
1851                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1852                         in.close(); m->mothurRemove(tempFile);
1853         }
1854         
1855 #else
1856                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1857                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1858                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1859                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1860                 
1861                 vector<uchimeData*> pDataArray; 
1862                 DWORD   dwThreadIdArray[processors-1];
1863                 HANDLE  hThreadArray[processors-1]; 
1864                 
1865                 //Create processor worker threads.
1866                 for( int i=1; i<processors; i++ ){
1867                         // Allocate memory for thread data.
1868                         string extension = toString(i) + ".temp";
1869                         
1870                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
1871                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1872                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1873                         
1874                         pDataArray.push_back(tempUchime);
1875                         processIDS.push_back(i);
1876                         
1877                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1878                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1879                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1880                 }
1881                 
1882                 
1883                 //using the main process as a worker saves time and memory
1884                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1885                 
1886                 //Wait until all threads have terminated.
1887                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1888                 
1889                 //Close all thread handles and free memory allocations.
1890                 for(int i=0; i < pDataArray.size(); i++){
1891                         num += pDataArray[i]->count;
1892                         CloseHandle(hThreadArray[i]);
1893                         delete pDataArray[i];
1894                 }
1895         
1896         
1897 #endif          
1898       
1899         //read my own
1900         if (hasCount && dups) {
1901             if (!m->isBlank(accnos + ".byCount")) {
1902                 ifstream in2;
1903                 m->openInputFile(accnos + ".byCount", in2);
1904                 
1905                 string name, group;
1906                 while (!in2.eof()) {
1907                     in2 >> name >> group; m->gobble(in2);
1908                     newCount.setAbund(name, group, 0);
1909                 }
1910                 in2.close();
1911             }
1912             m->mothurRemove(accnos + ".byCount");
1913         }
1914        
1915                 //append output files
1916                 for(int i=0;i<processIDS.size();i++){
1917                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1918                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1919                         
1920                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1921                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1922                         
1923                         if (chimealns) {
1924                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1925                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1926                         }
1927             
1928             if (hasCount && dups) {
1929                 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1930                     ifstream in2;
1931                     m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1932                     
1933                     string name, group;
1934                     while (!in2.eof()) {
1935                         in2 >> name >> group; m->gobble(in2);
1936                         newCount.setAbund(name, group, 0);
1937                     }
1938                     in2.close();
1939                 }
1940                 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1941             }
1942
1943                 }
1944         
1945         //print new *.pick.count_table
1946         if (hasCount && dups) {  newCount.printTable(newCountFile);   }
1947                 
1948                 return num;     
1949                 
1950         }
1951         catch(exception& e) {
1952                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1953                 exit(1);
1954         }
1955 }
1956 /**************************************************************************************************/
1957