]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26         CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40
41                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49
50                 vector<string> myArray;
51                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){   
61         try {
62                 string helpString = "";
63                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68         helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
74                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 #ifdef USE_MPI
93                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 #endif
95                 helpString += "The chimera.uchime command should be in the following format: \n";
96                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
113         else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
114         else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false; hasName=false; hasCount=false;
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145                 
146                 //allow user to run help
147                 if(option == "help") { help(); abort = true; calledHelp = true; }
148                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
149                 
150                 else {
151                         vector<string> myArray = setParameters();
152                         
153                         OptionParser parser(option);
154                         map<string,string> parameters = parser.getParameters();
155                         
156                         ValidParameters validParameter("chimera.uchime");
157                         map<string,string>::iterator it;
158                         
159                         //check to make sure all parameters are valid for command
160                         for (it = parameters.begin(); it != parameters.end(); it++) { 
161                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
162                         }
163                         
164                         vector<string> tempOutNames;
165                         outputTypes["chimera"] = tempOutNames;
166                         outputTypes["accnos"] = tempOutNames;
167                         outputTypes["alns"] = tempOutNames;
168             outputTypes["count"] = tempOutNames;
169                         
170                         //if the user changes the input directory command factory will send this info to us in the output parameter 
171                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
172                         if (inputDir == "not found"){   inputDir = "";          }
173                         
174                         //check for required parameters
175                         fastafile = validParameter.validFile(parameters, "fasta", false);
176                         if (fastafile == "not found") {                                 
177                                 //if there is a current fasta file, use it
178                                 string filename = m->getFastaFile(); 
179                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
181                         }else { 
182                                 m->splitAtDash(fastafile, fastaFileNames);
183                                 
184                                 //go through files and make sure they are good, if not, then disregard them
185                                 for (int i = 0; i < fastaFileNames.size(); i++) {
186                                         
187                                         bool ignore = false;
188                                         if (fastaFileNames[i] == "current") { 
189                                                 fastaFileNames[i] = m->getFastaFile(); 
190                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
191                                                 else {  
192                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
193                                                         //erase from file list
194                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
195                                                         i--;
196                                                 }
197                                         }
198                                         
199                                         if (!ignore) {
200                                                 
201                                                 if (inputDir != "") {
202                                                         string path = m->hasPath(fastaFileNames[i]);
203                                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
205                                                 }
206                                                 
207                                                 int ableToOpen;
208                                                 ifstream in;
209                                                 
210                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
211                                                 
212                                                 //if you can't open it, try default location
213                                                 if (ableToOpen == 1) {
214                                                         if (m->getDefaultPath() != "") { //default path is set
215                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
217                                                                 ifstream in2;
218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219                                                                 in2.close();
220                                                                 fastaFileNames[i] = tryPath;
221                                                         }
222                                                 }
223                                                 
224                                                 if (ableToOpen == 1) {
225                                                         if (m->getOutputDir() != "") { //default path is set
226                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
228                                                                 ifstream in2;
229                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
230                                                                 in2.close();
231                                                                 fastaFileNames[i] = tryPath;
232                                                         }
233                                                 }
234                                                 
235                                                 in.close();
236                                                 
237                                                 if (ableToOpen == 1) { 
238                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
239                                                         //erase from file list
240                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
241                                                         i--;
242                                                 }else {
243                                                         m->setFastaFile(fastaFileNames[i]);
244                                                 }
245                                         }
246                                 }
247                                 
248                                 //make sure there is at least one valid file left
249                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
250                         }
251                         
252                         
253                         //check for required parameters
254                         namefile = validParameter.validFile(parameters, "name", false);
255                         if (namefile == "not found") { namefile = "";   }
256                         else { 
257                                 m->splitAtDash(namefile, nameFileNames);
258                                 
259                                 //go through files and make sure they are good, if not, then disregard them
260                                 for (int i = 0; i < nameFileNames.size(); i++) {
261                                         
262                                         bool ignore = false;
263                                         if (nameFileNames[i] == "current") { 
264                                                 nameFileNames[i] = m->getNameFile(); 
265                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
266                                                 else {  
267                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
268                                                         //erase from file list
269                                                         nameFileNames.erase(nameFileNames.begin()+i);
270                                                         i--;
271                                                 }
272                                         }
273                                         
274                                         if (!ignore) {
275                                                 
276                                                 if (inputDir != "") {
277                                                         string path = m->hasPath(nameFileNames[i]);
278                                                         //if the user has not given a path then, add inputdir. else leave path alone.
279                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
280                                                 }
281                                                 
282                                                 int ableToOpen;
283                                                 ifstream in;
284                                                 
285                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
286                                                 
287                                                 //if you can't open it, try default location
288                                                 if (ableToOpen == 1) {
289                                                         if (m->getDefaultPath() != "") { //default path is set
290                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
292                                                                 ifstream in2;
293                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
294                                                                 in2.close();
295                                                                 nameFileNames[i] = tryPath;
296                                                         }
297                                                 }
298                                                 
299                                                 if (ableToOpen == 1) {
300                                                         if (m->getOutputDir() != "") { //default path is set
301                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
303                                                                 ifstream in2;
304                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
305                                                                 in2.close();
306                                                                 nameFileNames[i] = tryPath;
307                                                         }
308                                                 }
309                                                 
310                                                 in.close();
311                                                 
312                                                 if (ableToOpen == 1) { 
313                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
314                                                         //erase from file list
315                                                         nameFileNames.erase(nameFileNames.begin()+i);
316                                                         i--;
317                                                 }else {
318                                                         m->setNameFile(nameFileNames[i]);
319                                                 }
320                                         }
321                                 }
322                         }
323             
324             if (nameFileNames.size() != 0) { hasName = true; }
325             
326             //check for required parameters
327             vector<string> countfileNames;
328                         countfile = validParameter.validFile(parameters, "count", false);
329                         if (countfile == "not found") { 
330                 countfile = "";  
331                         }else { 
332                                 m->splitAtDash(countfile, countfileNames);
333                                 
334                                 //go through files and make sure they are good, if not, then disregard them
335                                 for (int i = 0; i < countfileNames.size(); i++) {
336                                         
337                                         bool ignore = false;
338                                         if (countfileNames[i] == "current") { 
339                                                 countfileNames[i] = m->getCountTableFile(); 
340                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
341                                                 else {  
342                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
343                                                         //erase from file list
344                                                         countfileNames.erase(countfileNames.begin()+i);
345                                                         i--;
346                                                 }
347                                         }
348                                         
349                                         if (!ignore) {
350                                                 
351                                                 if (inputDir != "") {
352                                                         string path = m->hasPath(countfileNames[i]);
353                                                         //if the user has not given a path then, add inputdir. else leave path alone.
354                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
355                                                 }
356                                                 
357                                                 int ableToOpen;
358                                                 ifstream in;
359                                                 
360                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
361                                                 
362                                                 //if you can't open it, try default location
363                                                 if (ableToOpen == 1) {
364                                                         if (m->getDefaultPath() != "") { //default path is set
365                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
367                                                                 ifstream in2;
368                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
369                                                                 in2.close();
370                                                                 countfileNames[i] = tryPath;
371                                                         }
372                                                 }
373                                                 
374                                                 if (ableToOpen == 1) {
375                                                         if (m->getOutputDir() != "") { //default path is set
376                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
378                                                                 ifstream in2;
379                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
380                                                                 in2.close();
381                                                                 countfileNames[i] = tryPath;
382                                                         }
383                                                 }
384                                                 
385                                                 in.close();
386                                                 
387                                                 if (ableToOpen == 1) { 
388                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
389                                                         //erase from file list
390                                                         countfileNames.erase(countfileNames.begin()+i);
391                                                         i--;
392                                                 }else {
393                                                         m->setCountTableFile(countfileNames[i]);
394                                                 }
395                                         }
396                                 }
397                         }
398             
399             if (countfileNames.size() != 0) { hasCount = true; }
400             
401                         //make sure there is at least one valid file left
402             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
403             
404             if (!hasName && hasCount) { nameFileNames = countfileNames; }
405             
406                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
407                         
408                         bool hasGroup = true;
409                         groupfile = validParameter.validFile(parameters, "group", false);
410                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
411                         else { 
412                                 m->splitAtDash(groupfile, groupFileNames);
413                                 
414                                 //go through files and make sure they are good, if not, then disregard them
415                                 for (int i = 0; i < groupFileNames.size(); i++) {
416                                         
417                                         bool ignore = false;
418                                         if (groupFileNames[i] == "current") { 
419                                                 groupFileNames[i] = m->getGroupFile(); 
420                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
421                                                 else {  
422                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
423                                                         //erase from file list
424                                                         groupFileNames.erase(groupFileNames.begin()+i);
425                                                         i--;
426                                                 }
427                                         }
428                                         
429                                         if (!ignore) {
430                                                 
431                                                 if (inputDir != "") {
432                                                         string path = m->hasPath(groupFileNames[i]);
433                                                         //if the user has not given a path then, add inputdir. else leave path alone.
434                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
435                                                 }
436                                                 
437                                                 int ableToOpen;
438                                                 ifstream in;
439                                                 
440                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
441                                                 
442                                                 //if you can't open it, try default location
443                                                 if (ableToOpen == 1) {
444                                                         if (m->getDefaultPath() != "") { //default path is set
445                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
447                                                                 ifstream in2;
448                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
449                                                                 in2.close();
450                                                                 groupFileNames[i] = tryPath;
451                                                         }
452                                                 }
453                                                 
454                                                 if (ableToOpen == 1) {
455                                                         if (m->getOutputDir() != "") { //default path is set
456                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
458                                                                 ifstream in2;
459                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
460                                                                 in2.close();
461                                                                 groupFileNames[i] = tryPath;
462                                                         }
463                                                 }
464                                                 
465                                                 in.close();
466                                                 
467                                                 if (ableToOpen == 1) { 
468                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
469                                                         //erase from file list
470                                                         groupFileNames.erase(groupFileNames.begin()+i);
471                                                         i--;
472                                                 }else {
473                                                         m->setGroupFile(groupFileNames[i]);
474                                                 }
475                                         }
476                                 }
477                                 
478                                 //make sure there is at least one valid file left
479                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480                         }
481                         
482                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
483                         
484             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
485                         //if the user changes the output directory command factory will send this info to us in the output parameter 
486                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
487                         
488                         
489                         //if the user changes the output directory command factory will send this info to us in the output parameter 
490                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
491                         
492                         string path;
493                         it = parameters.find("reference");
494                         //user has given a template file
495                         if(it != parameters.end()){ 
496                                 if (it->second == "self") { templatefile = "self"; }
497                                 else {
498                                         path = m->hasPath(it->second);
499                                         //if the user has not given a path then, add inputdir. else leave path alone.
500                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
501                                         
502                                         templatefile = validParameter.validFile(parameters, "reference", true);
503                                         if (templatefile == "not open") { abort = true; }
504                                         else if (templatefile == "not found") { //check for saved reference sequences
505                                                 if (rdb->getSavedReference() != "") {
506                                                         templatefile = rdb->getSavedReference();
507                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
508                                                 }else {
509                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
510                                                         m->mothurOutEndLine();
511                                                         abort = true; 
512                                                 }
513                                         }
514                                 }
515                         }else if (hasName) {  templatefile = "self"; }
516             else if (hasCount) {  templatefile = "self"; }
517                         else { 
518                                 if (rdb->getSavedReference() != "") {
519                                         templatefile = rdb->getSavedReference();
520                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
521                                 }else {
522                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
523                                         m->mothurOutEndLine();
524                                         templatefile = ""; abort = true; 
525                                 } 
526                         }
527                                 
528                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
529                         m->setProcessors(temp);
530                         m->mothurConvert(temp, processors);
531                         
532                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
533                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
534                         
535                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
536                         chimealns = m->isTrue(temp); 
537                         
538                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
539                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
540                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
541                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
542                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
543                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
544                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
545                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
546                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
547                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
548                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
549                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
550             
551             strand = validParameter.validFile(parameters, "strand", false);     if (strand == "not found")      {  strand = ""; }
552                         
553                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
554                         ucl = m->isTrue(temp);
555                         
556                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
557                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
558                         
559                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
560                         skipgaps = m->isTrue(temp); 
561
562                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
563                         skipgaps2 = m->isTrue(temp); 
564             
565             
566                         temp = validParameter.validFile(parameters, "dereplicate", false);      
567                         if (temp == "not found") { 
568                                 if (groupfile != "")    {  temp = "false";                                      }
569                                 else                    {  temp = "true";       }
570                         }
571                         dups = m->isTrue(temp);
572
573                         
574                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
576                         
577                         //look for uchime exe
578                         path = m->argv;
579                         string tempPath = path;
580                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581                         path = path.substr(0, (tempPath.find_last_of('m')));
582                         
583                         string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
586             if (m->debug) { 
587                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
588                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589                 newCommand->execute();
590                 delete newCommand;
591                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
592                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593                 newCommand->execute();
594                 delete newCommand;
595             }
596 #else
597                         uchimeCommand = path + "uchime.exe";
598 #endif
599         
600                         //test to make sure uchime exists
601                         ifstream in;
602                         uchimeCommand = m->getFullPathName(uchimeCommand);
603                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604                         if(ableToOpen == 1) {   
605                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606                 //check to see if uchime is in the path??
607                 
608                 string uLocation = m->findProgramPath("uchime");
609                 
610                 
611                 ifstream in2;
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
614 #else
615                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
616 #endif
617
618                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
619                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620             }else {  uchimeLocation = uchimeCommand; }
621             
622             uchimeLocation = m->getFullPathName(uchimeLocation);
623         }
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
627                 exit(1);
628         }
629 }
630 //***************************************************************************************************************
631
632 int ChimeraUchimeCommand::execute(){
633         try{
634         
635         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
636                 
637                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
638                 
639                 for (int s = 0; s < fastaFileNames.size(); s++) {
640                         
641                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
642                         
643                         int start = time(NULL); 
644                         string nameFile = "";
645                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
646                         map<string, string> variables; 
647             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648                         string outputFileName = getOutputFileName("chimera", variables);
649                         string accnosFileName = getOutputFileName("accnos", variables);
650                         string alnsFileName = getOutputFileName("alns", variables);
651                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652             string newCountFile = "";
653                                 
654                         //you provided a groupfile
655                         string groupFile = "";
656             bool hasGroup = false;
657                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
658             else if (hasCount) {
659                 CountTable ct;
660                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662                 newCountFile = getOutputFileName("count", variables);
663             }
664                         
665                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
666
667                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669                                         nameFile = nameFileNames[s];
670                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
671                                                                                 
672                                 map<string, string> seqs;  
673                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
674
675                                 //read namefile
676                                 vector<seqPriorityNode> nameMapCount;
677                 int error;
678                 if (hasCount) {
679                     CountTable ct;
680                     ct.readTable(nameFile);
681                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682                         int num = ct.getNumSeqs(it->first);
683                         if (num == 0) { error = 1; }
684                         else {
685                             seqPriorityNode temp(num, it->second, it->first);
686                             nameMapCount.push_back(temp);
687                         }
688                     }
689                 }else {
690                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
691                 }
692                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
693                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
694                                 
695                                 printFile(nameMapCount, newFasta);
696                                 fastaFileNames[s] = newFasta;
697                         }
698                         
699                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
700                         
701                         if (hasGroup) {
702                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703                                         nameFile = nameFileNames[s];
704                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
705                                 
706                                 //Parse sequences by group
707                 vector<string> groups;
708                 map<string, string> uniqueNames;
709                 if (hasCount) {
710                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711                     groups = cparser->getNamesOfGroups();
712                     uniqueNames = cparser->getAllSeqsMap();
713                 }else{
714                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715                     groups = sparser->getNamesOfGroups();
716                     uniqueNames = sparser->getAllSeqsMap();
717                 }
718                                         
719                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
720                                                                 
721                                 //clears files
722                                 ofstream out, out1, out2;
723                                 m->openOutputFile(outputFileName, out); out.close(); 
724                                 m->openOutputFile(accnosFileName, out1); out1.close();
725                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
726                                 int totalSeqs = 0;
727                                 
728                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
729                     
730                     if (hasCount && dups) {
731                         CountTable c; c.readTable(nameFile);
732                         if (!m->isBlank(newCountFile)) {
733                             ifstream in2;
734                             m->openInputFile(newCountFile, in2);
735                             
736                             string name, group;
737                             while (!in2.eof()) {
738                                 in2 >> name >> group; m->gobble(in2);
739                                 c.setAbund(name, group, 0);
740                             }
741                             in2.close();
742                         }
743                         m->mothurRemove(newCountFile);
744                         c.printTable(newCountFile);
745                     }
746
747                 }else                           {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
748
749                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
750                
751                 
752                 if (!dups) { 
753                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
754                                 
755                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
756                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
757                                 }else {
758                     
759                     if (hasCount) {
760                         set<string> doNotRemove;
761                         CountTable c; c.readTable(newCountFile);
762                         vector<string> namesInTable = c.getNamesOfSeqs();
763                         for (int i = 0; i < namesInTable.size(); i++) {
764                             int temp = c.getNumSeqs(namesInTable[i]);
765                             if (temp == 0) {  c.remove(namesInTable[i]);  }
766                             else { doNotRemove.insert((namesInTable[i])); }
767                         }
768                         //remove names we want to keep from accnos file.
769                         set<string> accnosNames = m->readAccnos(accnosFileName);
770                         ofstream out2;
771                         m->openOutputFile(accnosFileName, out2);
772                         for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
773                             if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
774                         }
775                         out2.close();
776                         c.printTable(newCountFile);
777                     }
778                 }
779                 
780                 if (hasCount) { delete cparser; }
781                 else { delete sparser; }
782                 
783                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
784                                         
785                         }else{
786                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
787                         
788                                 int numSeqs = 0;
789                                 int numChimeras = 0;
790
791                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
792                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793                                 
794                                 //add headings
795                                 ofstream out;
796                                 m->openOutputFile(outputFileName+".temp", out); 
797                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
798                                 out.close();
799                                 
800                                 m->appendFiles(outputFileName, outputFileName+".temp");
801                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
802                                 
803                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
804                         
805                                 //remove file made for uchime
806                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
807                         
808                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
809                         }
810                         
811                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
812                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
813                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
814                 }
815         
816                 //set accnos file as new current accnosfile
817                 string current = "";
818                 itTypes = outputTypes.find("accnos");
819                 if (itTypes != outputTypes.end()) {
820                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
821                 }
822                 
823                 m->mothurOutEndLine();
824                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
825                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
826                 m->mothurOutEndLine();
827                 
828                 return 0;
829                 
830         }
831         catch(exception& e) {
832                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
833                 exit(1);
834         }
835 }
836 //**********************************************************************************************************************
837 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
838         try {
839                 map<string, string>::iterator itUnique;
840                 int total = 0;
841                 
842                 //edit accnos file
843                 ifstream in2; 
844                 m->openInputFile(accnosFileName, in2);
845                 
846                 ofstream out2;
847                 m->openOutputFile(accnosFileName+".temp", out2);
848                 
849                 string name;
850                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
851                 set<string>::iterator itNames;
852                 set<string> chimerasInFile;
853                 set<string>::iterator itChimeras;
854
855                 
856                 while (!in2.eof()) {
857                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
858                         
859                         in2 >> name; m->gobble(in2);
860                         
861                         //find unique name
862                         itUnique = uniqueNames.find(name);
863                         
864                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
865                         else {
866                                 itChimeras = chimerasInFile.find((itUnique->second));
867                                 
868                                 if (itChimeras == chimerasInFile.end()) {
869                                         out2 << itUnique->second << endl;
870                                         chimerasInFile.insert((itUnique->second));
871                                         total++;
872                                 }
873                         }
874                 }
875                 in2.close();
876                 out2.close();
877                 
878                 m->mothurRemove(accnosFileName);
879                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
880                 
881                 
882                 
883                 //edit chimera file
884                 ifstream in; 
885                 m->openInputFile(outputFileName, in);
886                 
887                 ofstream out;
888                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
889                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
890                 
891                 float temp1;
892                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
893                 name = "";
894                 namesInFile.clear();    
895                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
896                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
897                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
898                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
899                 */
900                 
901                 while (!in.eof()) {
902                         
903                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
904                         
905                         bool print = false;
906                         in >> temp1;    m->gobble(in);
907                         in >> name;             m->gobble(in);
908                         in >> parent1;  m->gobble(in);
909                         in >> parent2;  m->gobble(in);
910                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
911                         m->gobble(in);
912                         
913                         //parse name - name will look like U68590/ab=1/
914                         string restOfName = "";
915                         int pos = name.find_first_of('/');
916                         if (pos != string::npos) {
917                                 restOfName = name.substr(pos);
918                                 name = name.substr(0, pos);
919                         }
920                         
921                         //find unique name
922                         itUnique = uniqueNames.find(name);
923                         
924                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
925                         else {
926                                 name = itUnique->second;
927                                 //is this name already in the file
928                                 itNames = namesInFile.find((name));
929                                 
930                                 if (itNames == namesInFile.end()) { //no not in file
931                                         if (flag == "N") { //are you really a no??
932                                                 //is this sequence really not chimeric??
933                                                 itChimeras = chimerasInFile.find(name);
934                                                 
935                                                 //then you really are a no so print, otherwise skip
936                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
937                                         }else{ print = true; }
938                                 }
939                         }
940                         
941                         if (print) {
942                                 out << temp1 << '\t' << name << restOfName << '\t';
943                                 namesInFile.insert(name);
944                                 
945                                 //parse parent1 names
946                                 if (parent1 != "*") {
947                                         restOfName = "";
948                                         pos = parent1.find_first_of('/');
949                                         if (pos != string::npos) {
950                                                 restOfName = parent1.substr(pos);
951                                                 parent1 = parent1.substr(0, pos);
952                                         }
953                                         
954                                         itUnique = uniqueNames.find(parent1);
955                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
956                                         else {  out << itUnique->second << restOfName << '\t';  }
957                                 }else { out << parent1 << '\t'; }
958                                 
959                                 //parse parent2 names
960                                 if (parent2 != "*") {
961                                         restOfName = "";
962                                         pos = parent2.find_first_of('/');
963                                         if (pos != string::npos) {
964                                                 restOfName = parent2.substr(pos);
965                                                 parent2 = parent2.substr(0, pos);
966                                         }
967                                         
968                                         itUnique = uniqueNames.find(parent2);
969                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
970                                         else {  out << itUnique->second << restOfName << '\t';  }
971                                 }else { out << parent2 << '\t'; }
972                                 
973                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
974                         }
975                 }
976                 in.close();
977                 out.close();
978                 
979                 m->mothurRemove(outputFileName);
980                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
981                 
982                                 
983                 //edit anls file
984                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
985                 /*
986                  ------------------------------------------------------------------------
987                  Query   (  179 nt) F21Fcsw_11639/ab=591/
988                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
989                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
990                  
991                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
992                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
993                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
994                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
995                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
996                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
997                  
998                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
999                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1000                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1001                  Diffs      NNN     N N  N                   N  N BB    NNN                              
1002                  Votes      000     0 0  0                   0  0 ++    000                              
1003                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1004                  
1005                  A   159 TGGAACTGAGACACGGTCCAA 179
1006                  Q   159 TGGAACTGAGACACGGTCCAA 179
1007                  B   161 TGGAACTGAGACACGGTCCAA 181
1008                  Diffs                        
1009                  Votes                        
1010                  Model   BBBBBBBBBBBBBBBBBBBBB
1011                  
1012                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1013                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1014                 */
1015                 if (chimealns) {
1016                         ifstream in3; 
1017                         m->openInputFile(alnsFileName, in3);
1018                 
1019                         ofstream out3;
1020                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1021                 
1022                         name = "";
1023                         namesInFile.clear();
1024                         string line = "";
1025                         
1026                         while (!in3.eof()) {
1027                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1028                                 
1029                                 line = "";
1030                                 line = m->getline(in3); 
1031                                 string temp = "";
1032                                 
1033                                 if (line != "") {
1034                                         istringstream iss(line);
1035                                         iss >> temp;
1036                                         
1037                                         //are you a name line
1038                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1039                                                 int spot = 0;
1040                                                 for (int i = 0; i < line.length(); i++) {
1041                                                         spot = i;
1042                                                         if (line[i] == ')') { break; }
1043                                                         else { out3 << line[i]; }
1044                                                 }
1045                                                 
1046                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1047                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1048                                                 else {
1049                                                         out << line[spot] << line[spot+1];
1050                                                         
1051                                                         name = line.substr(spot+2);
1052                                                         
1053                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1054                                                         string restOfName = "";
1055                                                         int pos = name.find_first_of('/');
1056                                                         if (pos != string::npos) {
1057                                                                 restOfName = name.substr(pos);
1058                                                                 name = name.substr(0, pos);
1059                                                         }
1060                                                         
1061                                                         //find unique name
1062                                                         itUnique = uniqueNames.find(name);
1063                                                         
1064                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1065                                                         else {
1066                                                                 //only limit repeats on query names
1067                                                                 if (temp == "Query") {
1068                                                                         itNames = namesInFile.find((itUnique->second));
1069                                                                         
1070                                                                         if (itNames == namesInFile.end()) {
1071                                                                                 out << itUnique->second << restOfName << endl;
1072                                                                                 namesInFile.insert((itUnique->second));
1073                                                                         }
1074                                                                 }else { out << itUnique->second << restOfName << endl;  }
1075                                                         }
1076                                                         
1077                                                 }
1078                                                 
1079                                         }else { //not need to alter line
1080                                                 out3 << line << endl;
1081                                         }
1082                                 }else { out3 << endl; }
1083                         }
1084                         in3.close();
1085                         out3.close();
1086                         
1087                         m->mothurRemove(alnsFileName);
1088                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1089                 }
1090                 
1091                 return total;
1092         }
1093         catch(exception& e) {
1094                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1095                 exit(1);
1096         }
1097 }       
1098 //**********************************************************************************************************************
1099 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1100         try {
1101                 
1102                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1103                 
1104                 ofstream out;
1105                 m->openOutputFile(filename, out);
1106                 
1107                 //print new file in order of
1108                 for (int i = 0; i < nameMapCount.size(); i++) {
1109                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1110                 }
1111                 out.close();
1112                 
1113                 return 0;
1114         }
1115         catch(exception& e) {
1116                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1117                 exit(1);
1118         }
1119 }       
1120 //**********************************************************************************************************************
1121 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1122         try {
1123                 //create input file for uchime
1124                 //read through fastafile and store info
1125                 ifstream in;
1126                 m->openInputFile(filename, in);
1127                 
1128                 while (!in.eof()) {
1129                         
1130                         if (m->control_pressed) { in.close(); return 0; }
1131                         
1132                         Sequence seq(in); m->gobble(in);
1133                         seqs[seq.getName()] = seq.getAligned();
1134                 }
1135                 in.close();
1136                 
1137                 return 0;
1138         }
1139         catch(exception& e) {
1140                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1141                 exit(1);
1142         }
1143 }       
1144 //**********************************************************************************************************************
1145
1146 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1147         try {
1148                 string nameFile = "";
1149                 
1150                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1151                 
1152                 //use unique.seqs to create new name and fastafile
1153                 string inputString = "fasta=" + inputFile;
1154                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1155                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1156                 m->mothurCalling = true;
1157         
1158                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1159                 uniqueCommand->execute();
1160                 
1161                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1162                 
1163                 delete uniqueCommand;
1164                 m->mothurCalling = false;
1165                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1166                 
1167                 nameFile = filenames["name"][0];
1168                 inputFile = filenames["fasta"][0];
1169                 
1170                 return nameFile;
1171         }
1172         catch(exception& e) {
1173                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1174                 exit(1);
1175         }
1176 }
1177 //**********************************************************************************************************************
1178 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1179         try {
1180                 
1181                 int totalSeqs = 0;
1182                 int numChimeras = 0;
1183         
1184         ofstream outCountList;
1185         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1186         
1187                 for (int i = start; i < end; i++) {
1188                         int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
1189             
1190                         int error;
1191             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1192             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1193                         
1194                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1195                         totalSeqs += numSeqs;
1196                         
1197                         if (m->control_pressed) { return 0; }
1198                         
1199                         //remove file made for uchime
1200                         if (!m->debug) {  m->mothurRemove(filename);  }
1201             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1202                         
1203             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1204             //This table will zero out group counts for seqs determined to be chimeric by that group.
1205             if (dups) {
1206                 if (!m->isBlank(accnos+groups[i])) {
1207                     ifstream in;
1208                     m->openInputFile(accnos+groups[i], in);
1209                     string name;
1210                     if (hasCount) {
1211                         while (!in.eof()) {
1212                             in >> name; m->gobble(in);
1213                             outCountList << name << '\t' << groups[i] << endl;
1214                         }
1215                         in.close();
1216                     }else {
1217                         map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1218                         map<string, string>::iterator itN;
1219                         ofstream out;
1220                         m->openOutputFile(accnos+groups[i]+".temp", out);
1221                         while (!in.eof()) {
1222                             in >> name; m->gobble(in); 
1223                             itN = thisnamemap.find(name);
1224                             if (itN != thisnamemap.end()) {
1225                                 vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
1226                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1227                                 
1228                             }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1229                         }
1230                         out.close();
1231                         in.close();
1232                         m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1233                     }
1234                    
1235                 }
1236             }
1237             
1238                         //append files
1239                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1240                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1241                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1242                         
1243                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1244                 }
1245
1246         if (hasCount && dups) { outCountList.close(); }
1247         
1248         return totalSeqs;
1249                 
1250         }
1251         catch(exception& e) {
1252                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1253                 exit(1);
1254         }
1255 }       
1256 //**********************************************************************************************************************
1257
1258 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1259         try {
1260                 
1261                 outputFName = m->getFullPathName(outputFName);
1262                 filename = m->getFullPathName(filename);
1263                 alns = m->getFullPathName(alns);
1264                 
1265                 //to allow for spaces in the path
1266                 outputFName = "\"" + outputFName + "\"";
1267                 filename = "\"" + filename + "\"";
1268                 alns = "\"" + alns + "\"";
1269                                 
1270                 vector<char*> cPara;
1271                 
1272                 string uchimeCommand = uchimeLocation;
1273         uchimeCommand = "\"" + uchimeCommand + "\" ";
1274         
1275         char* tempUchime;
1276                 tempUchime= new char[uchimeCommand.length()+1]; 
1277                 *tempUchime = '\0';
1278                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1279                 cPara.push_back(tempUchime);
1280                 
1281         //are you using a reference file
1282                 if (templatefile != "self") {
1283             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1284             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1285             filename = outputFileName;
1286             filename = "\"" + filename + "\"";
1287                         //add reference file
1288                         char* tempRef = new char[5]; 
1289                         //strcpy(tempRef, "--db"); 
1290                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1291                         cPara.push_back(tempRef);  
1292                         char* tempR = new char[templatefile.length()+1];
1293                         //strcpy(tempR, templatefile.c_str());
1294                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1295                         cPara.push_back(tempR);
1296                 }
1297                 
1298                 char* tempIn = new char[8]; 
1299                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1300                 //strcpy(tempIn, "--input"); 
1301                 cPara.push_back(tempIn);
1302                 char* temp = new char[filename.length()+1];
1303                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1304                 //strcpy(temp, filename.c_str());
1305                 cPara.push_back(temp);
1306                 
1307                 char* tempO = new char[12]; 
1308                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1309                 //strcpy(tempO, "--uchimeout"); 
1310                 cPara.push_back(tempO);
1311                 char* tempout = new char[outputFName.length()+1];
1312                 //strcpy(tempout, outputFName.c_str());
1313                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1314                 cPara.push_back(tempout);
1315                 
1316                 if (chimealns) {
1317                         char* tempA = new char[13]; 
1318                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1319                         //strcpy(tempA, "--uchimealns"); 
1320                         cPara.push_back(tempA);
1321                         char* tempa = new char[alns.length()+1];
1322                         //strcpy(tempa, alns.c_str());
1323                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1324                         cPara.push_back(tempa);
1325                 }
1326         
1327         if (strand != "") {
1328                         char* tempA = new char[9]; 
1329                         *tempA = '\0'; strncat(tempA, "--strand", 8);
1330                         cPara.push_back(tempA);
1331                         char* tempa = new char[strand.length()+1];
1332                         *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1333                         cPara.push_back(tempa);
1334                 }
1335                 
1336                 if (useAbskew) {
1337                         char* tempskew = new char[9];
1338                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1339                         //strcpy(tempskew, "--abskew"); 
1340                         cPara.push_back(tempskew);
1341                         char* tempSkew = new char[abskew.length()+1];
1342                         //strcpy(tempSkew, abskew.c_str());
1343                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1344                         cPara.push_back(tempSkew);
1345                 }
1346                 
1347                 if (useMinH) {
1348                         char* tempminh = new char[7]; 
1349                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1350                         //strcpy(tempminh, "--minh"); 
1351                         cPara.push_back(tempminh);
1352                         char* tempMinH = new char[minh.length()+1];
1353                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1354                         //strcpy(tempMinH, minh.c_str());
1355                         cPara.push_back(tempMinH);
1356                 }
1357                 
1358                 if (useMindiv) {
1359                         char* tempmindiv = new char[9]; 
1360                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1361                         //strcpy(tempmindiv, "--mindiv"); 
1362                         cPara.push_back(tempmindiv);
1363                         char* tempMindiv = new char[mindiv.length()+1];
1364                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1365                         //strcpy(tempMindiv, mindiv.c_str());
1366                         cPara.push_back(tempMindiv);
1367                 }
1368                 
1369                 if (useXn) {
1370                         char* tempxn = new char[5]; 
1371                         //strcpy(tempxn, "--xn"); 
1372                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1373                         cPara.push_back(tempxn);
1374                         char* tempXn = new char[xn.length()+1];
1375                         //strcpy(tempXn, xn.c_str());
1376                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1377                         cPara.push_back(tempXn);
1378                 }
1379                 
1380                 if (useDn) {
1381                         char* tempdn = new char[5]; 
1382                         //strcpy(tempdn, "--dn"); 
1383                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1384                         cPara.push_back(tempdn);
1385                         char* tempDn = new char[dn.length()+1];
1386                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1387                         //strcpy(tempDn, dn.c_str());
1388                         cPara.push_back(tempDn);
1389                 }
1390                 
1391                 if (useXa) {
1392                         char* tempxa = new char[5]; 
1393                         //strcpy(tempxa, "--xa"); 
1394                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1395                         cPara.push_back(tempxa);
1396                         char* tempXa = new char[xa.length()+1];
1397                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1398                         //strcpy(tempXa, xa.c_str());
1399                         cPara.push_back(tempXa);
1400                 }
1401                 
1402                 if (useChunks) {
1403                         char* tempchunks = new char[9]; 
1404                         //strcpy(tempchunks, "--chunks"); 
1405                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1406                         cPara.push_back(tempchunks);
1407                         char* tempChunks = new char[chunks.length()+1];
1408                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1409                         //strcpy(tempChunks, chunks.c_str());
1410                         cPara.push_back(tempChunks);
1411                 }
1412                 
1413                 if (useMinchunk) {
1414                         char* tempminchunk = new char[11]; 
1415                         //strcpy(tempminchunk, "--minchunk"); 
1416                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1417                         cPara.push_back(tempminchunk);
1418                         char* tempMinchunk = new char[minchunk.length()+1];
1419                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1420                         //strcpy(tempMinchunk, minchunk.c_str());
1421                         cPara.push_back(tempMinchunk);
1422                 }
1423                 
1424                 if (useIdsmoothwindow) {
1425                         char* tempidsmoothwindow = new char[17]; 
1426                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1427                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1428                         cPara.push_back(tempidsmoothwindow);
1429                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1430                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1431                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1432                         cPara.push_back(tempIdsmoothwindow);
1433                 }
1434                 
1435                 /*if (useMinsmoothid) {
1436                         char* tempminsmoothid = new char[14]; 
1437                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1438                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1439                         cPara.push_back(tempminsmoothid);
1440                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1441                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1442                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1443                         cPara.push_back(tempMinsmoothid);
1444                 }*/
1445                 
1446                 if (useMaxp) {
1447                         char* tempmaxp = new char[7]; 
1448                         //strcpy(tempmaxp, "--maxp"); 
1449                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1450                         cPara.push_back(tempmaxp);
1451                         char* tempMaxp = new char[maxp.length()+1];
1452                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1453                         //strcpy(tempMaxp, maxp.c_str());
1454                         cPara.push_back(tempMaxp);
1455                 }
1456                 
1457                 if (!skipgaps) {
1458                         char* tempskipgaps = new char[13]; 
1459                         //strcpy(tempskipgaps, "--[no]skipgaps");
1460                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1461                         cPara.push_back(tempskipgaps);
1462                 }
1463                 
1464                 if (!skipgaps2) {
1465                         char* tempskipgaps2 = new char[14]; 
1466                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1467                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1468                         cPara.push_back(tempskipgaps2);
1469                 }
1470                 
1471                 if (useMinlen) {
1472                         char* tempminlen = new char[9]; 
1473                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1474                         //strcpy(tempminlen, "--minlen"); 
1475                         cPara.push_back(tempminlen);
1476                         char* tempMinlen = new char[minlen.length()+1];
1477                         //strcpy(tempMinlen, minlen.c_str());
1478                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1479                         cPara.push_back(tempMinlen);
1480                 }
1481                 
1482                 if (useMaxlen) {
1483                         char* tempmaxlen = new char[9]; 
1484                         //strcpy(tempmaxlen, "--maxlen"); 
1485                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1486                         cPara.push_back(tempmaxlen);
1487                         char* tempMaxlen = new char[maxlen.length()+1];
1488                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1489                         //strcpy(tempMaxlen, maxlen.c_str());
1490                         cPara.push_back(tempMaxlen);
1491                 }
1492                 
1493                 if (ucl) {
1494                         char* tempucl = new char[5]; 
1495                         strcpy(tempucl, "--ucl"); 
1496                         cPara.push_back(tempucl);
1497                 }
1498                 
1499                 if (useQueryfract) {
1500                         char* tempqueryfract = new char[13]; 
1501                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1502                         //strcpy(tempqueryfract, "--queryfract"); 
1503                         cPara.push_back(tempqueryfract);
1504                         char* tempQueryfract = new char[queryfract.length()+1];
1505                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1506                         //strcpy(tempQueryfract, queryfract.c_str());
1507                         cPara.push_back(tempQueryfract);
1508                 }
1509                 
1510                 
1511                 char** uchimeParameters;
1512                 uchimeParameters = new char*[cPara.size()];
1513                 string commandString = "";
1514                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1515                 //int numArgs = cPara.size();
1516                 
1517                 //uchime_main(numArgs, uchimeParameters); 
1518                 //cout << "commandString = " << commandString << endl;
1519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1520 #else
1521                 commandString = "\"" + commandString + "\"";
1522 #endif
1523         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1524                 system(commandString.c_str());
1525                 
1526                 //free memory
1527                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1528                 delete[] uchimeParameters; 
1529                 
1530                 //remove "" from filenames
1531                 outputFName = outputFName.substr(1, outputFName.length()-2);
1532                 filename = filename.substr(1, filename.length()-2);
1533                 alns = alns.substr(1, alns.length()-2);
1534                 
1535                 if (m->control_pressed) { return 0; }
1536                 
1537                 //create accnos file from uchime results
1538                 ifstream in; 
1539                 m->openInputFile(outputFName, in);
1540                 
1541                 ofstream out;
1542                 m->openOutputFile(accnos, out);
1543                 
1544                 int num = 0;
1545                 numChimeras = 0;
1546                 while(!in.eof()) {
1547                         
1548                         if (m->control_pressed) { break; }
1549                         
1550                         string name = "";
1551                         string chimeraFlag = "";
1552                         //in >> chimeraFlag >> name;
1553                         
1554             string line = m->getline(in);
1555             vector<string> pieces = m->splitWhiteSpace(line);
1556             if (pieces.size() > 2) { 
1557                 name = pieces[1];
1558                 //fix name if needed
1559                 if (templatefile == "self") { 
1560                     name = name.substr(0, name.length()-1); //rip off last /
1561                     name = name.substr(0, name.find_last_of('/'));
1562                 }
1563                 
1564                 chimeraFlag = pieces[pieces.size()-1];
1565                         }
1566                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1567                         m->gobble(in);
1568                         
1569                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1570                         num++;
1571                 }
1572                 in.close();
1573                 out.close();
1574                 
1575         //if (templatefile != "self") {  m->mothurRemove(filename); }
1576         
1577                 return num;
1578         }
1579         catch(exception& e) {
1580                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1581                 exit(1);
1582         }
1583 }
1584 /**************************************************************************************************/
1585 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1586 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1587         try {
1588         
1589         ifstream in;
1590         m->openInputFile(filename, in);
1591         
1592         ofstream out;
1593         m->openOutputFile(output, out);
1594         
1595         while (!in.eof()) {
1596             if (m->control_pressed) { break;  }
1597             
1598             Sequence seq(in); m->gobble(in);
1599             
1600             if (seq.getName() != "") { seq.printSequence(out); }
1601         }
1602         in.close();
1603         out.close();
1604         
1605         return 0;
1606     }
1607         catch(exception& e) {
1608                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1609                 exit(1);
1610         }
1611 }
1612 /**************************************************************************************************/
1613
1614 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1615         try {
1616                 
1617                 processIDS.clear();
1618                 int process = 1;
1619                 int num = 0;
1620                 vector<string> files;
1621                 
1622 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1623                 //break up file into multiple files
1624                 m->divideFile(filename, processors, files);
1625                 
1626                 if (m->control_pressed) {  return 0;  }
1627                                 
1628                 //loop through and create all the processes you want
1629                 while (process != processors) {
1630                         int pid = fork();
1631                         
1632                         if (pid > 0) {
1633                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1634                                 process++;
1635                         }else if (pid == 0){
1636                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1637                                 
1638                                 //pass numSeqs to parent
1639                                 ofstream out;
1640                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1641                                 m->openOutputFile(tempFile, out);
1642                                 out << num << endl;
1643                                 out << numChimeras << endl;
1644                                 out.close();
1645                                 
1646                                 exit(0);
1647                         }else { 
1648                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1649                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1650                                 exit(0);
1651                         }
1652                 }
1653                 
1654                 //do my part
1655                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1656                 
1657                 //force parent to wait until all the processes are done
1658                 for (int i=0;i<processIDS.size();i++) { 
1659                         int temp = processIDS[i];
1660                         wait(&temp);
1661                 }
1662                 
1663                 for (int i = 0; i < processIDS.size(); i++) {
1664                         ifstream in;
1665                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1666                         m->openInputFile(tempFile, in);
1667                         if (!in.eof()) { 
1668                                 int tempNum = 0; 
1669                                 in >> tempNum; m->gobble(in);
1670                                 num += tempNum; 
1671                                 in >> tempNum;
1672                                 numChimeras += tempNum;
1673                         }
1674                         in.close(); m->mothurRemove(tempFile);
1675                 }
1676 #else
1677                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1678                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1679                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1680                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1681                 
1682                 //divide file
1683                 int count = 0;
1684                 int spot = 0;
1685                 map<int, ofstream*> filehandles;
1686                 map<int, ofstream*>::iterator it3;
1687                 
1688                 ofstream* temp;
1689                 for (int i = 0; i < processors; i++) {
1690                         temp = new ofstream;
1691                         filehandles[i] = temp;
1692                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1693                         files.push_back(filename+toString(i)+".temp");
1694                 }
1695                 
1696                 ifstream in;
1697                 m->openInputFile(filename, in);
1698                 
1699                 while(!in.eof()) {
1700                         
1701                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1702                         
1703                         Sequence tempSeq(in); m->gobble(in); 
1704                         
1705                         if (tempSeq.getName() != "") {
1706                                 tempSeq.printSequence(*(filehandles[spot])); 
1707                                 spot++; count++;
1708                                 if (spot == processors) { spot = 0; }
1709                         }
1710                 }
1711                 in.close();
1712                 
1713                 //delete memory
1714                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1715                         (*(it3->second)).close();
1716                         delete it3->second;
1717                 }
1718                 
1719                 //sanity check for number of processors
1720                 if (count < processors) { processors = count; }
1721                 
1722                 vector<uchimeData*> pDataArray; 
1723                 DWORD   dwThreadIdArray[processors-1];
1724                 HANDLE  hThreadArray[processors-1]; 
1725                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1726                 
1727                 //Create processor worker threads.
1728                 for( int i=1; i<processors; i++ ){
1729                         // Allocate memory for thread data.
1730                         string extension = toString(i) + ".temp";
1731                         
1732                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
1733                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1734                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1735                         
1736                         pDataArray.push_back(tempUchime);
1737                         processIDS.push_back(i);
1738                         
1739                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1740                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1741                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1742                 }
1743                 
1744                 
1745                 //using the main process as a worker saves time and memory
1746                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1747                 
1748                 //Wait until all threads have terminated.
1749                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1750                 
1751                 //Close all thread handles and free memory allocations.
1752                 for(int i=0; i < pDataArray.size(); i++){
1753                         num += pDataArray[i]->count;
1754                         numChimeras += pDataArray[i]->numChimeras;
1755                         CloseHandle(hThreadArray[i]);
1756                         delete pDataArray[i];
1757                 }
1758 #endif          
1759                 
1760                 //append output files
1761                 for(int i=0;i<processIDS.size();i++){
1762                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1763                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1764                         
1765                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1766                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1767                         
1768                         if (chimealns) {
1769                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1770                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1771                         }
1772                 }
1773                 
1774                 //get rid of the file pieces.
1775                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1776                 return num;     
1777         }
1778         catch(exception& e) {
1779                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1780                 exit(1);
1781         }
1782 }
1783 /**************************************************************************************************/
1784
1785 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1786         try {
1787                 
1788                 processIDS.clear();
1789                 int process = 1;
1790                 int num = 0;
1791         
1792         CountTable newCount;
1793         if (hasCount && dups) { newCount.readTable(nameFile); }
1794                 
1795                 //sanity check
1796                 if (groups.size() < processors) { processors = groups.size(); }
1797                 
1798                 //divide the groups between the processors
1799                 vector<linePair> lines;
1800                 int numGroupsPerProcessor = groups.size() / processors;
1801                 for (int i = 0; i < processors; i++) {
1802                         int startIndex =  i * numGroupsPerProcessor;
1803                         int endIndex = (i+1) * numGroupsPerProcessor;
1804                         if(i == (processors - 1)){      endIndex = groups.size();       }
1805                         lines.push_back(linePair(startIndex, endIndex));
1806                 }
1807                 
1808 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1809                                 
1810                 //loop through and create all the processes you want
1811                 while (process != processors) {
1812                         int pid = fork();
1813                         
1814                         if (pid > 0) {
1815                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1816                                 process++;
1817                         }else if (pid == 0){
1818                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1819                                 
1820                                 //pass numSeqs to parent
1821                                 ofstream out;
1822                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1823                                 m->openOutputFile(tempFile, out);
1824                                 out << num << endl;
1825                                 out.close();
1826                                 
1827                                 exit(0);
1828                         }else { 
1829                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1830                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1831                                 exit(0);
1832                         }
1833                 }
1834                 
1835                 //do my part
1836                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1837                 
1838                 //force parent to wait until all the processes are done
1839                 for (int i=0;i<processIDS.size();i++) { 
1840                         int temp = processIDS[i];
1841                         wait(&temp);
1842                 }
1843         
1844                 for (int i = 0; i < processIDS.size(); i++) {
1845                         ifstream in;
1846                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1847                         m->openInputFile(tempFile, in);
1848                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1849                         in.close(); m->mothurRemove(tempFile);
1850         }
1851         
1852 #else
1853                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1854                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1855                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1856                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1857                 
1858                 vector<uchimeData*> pDataArray; 
1859                 DWORD   dwThreadIdArray[processors-1];
1860                 HANDLE  hThreadArray[processors-1]; 
1861                 
1862                 //Create processor worker threads.
1863                 for( int i=1; i<processors; i++ ){
1864                         // Allocate memory for thread data.
1865                         string extension = toString(i) + ".temp";
1866                         
1867                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
1868                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1869                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1870                         
1871                         pDataArray.push_back(tempUchime);
1872                         processIDS.push_back(i);
1873                         
1874                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1875                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1876                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1877                 }
1878                 
1879                 
1880                 //using the main process as a worker saves time and memory
1881                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1882                 
1883                 //Wait until all threads have terminated.
1884                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1885                 
1886                 //Close all thread handles and free memory allocations.
1887                 for(int i=0; i < pDataArray.size(); i++){
1888                         num += pDataArray[i]->count;
1889                         CloseHandle(hThreadArray[i]);
1890                         delete pDataArray[i];
1891                 }
1892         
1893         
1894 #endif          
1895       
1896         //read my own
1897         if (hasCount && dups) {
1898             if (!m->isBlank(accnos + ".byCount")) {
1899                 ifstream in2;
1900                 m->openInputFile(accnos + ".byCount", in2);
1901                 
1902                 string name, group;
1903                 while (!in2.eof()) {
1904                     in2 >> name >> group; m->gobble(in2);
1905                     newCount.setAbund(name, group, 0);
1906                 }
1907                 in2.close();
1908             }
1909             m->mothurRemove(accnos + ".byCount");
1910         }
1911        
1912                 //append output files
1913                 for(int i=0;i<processIDS.size();i++){
1914                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1915                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1916                         
1917                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1918                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1919                         
1920                         if (chimealns) {
1921                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1922                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1923                         }
1924             
1925             if (hasCount && dups) {
1926                 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1927                     ifstream in2;
1928                     m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1929                     
1930                     string name, group;
1931                     while (!in2.eof()) {
1932                         in2 >> name >> group; m->gobble(in2);
1933                         newCount.setAbund(name, group, 0);
1934                     }
1935                     in2.close();
1936                 }
1937                 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1938             }
1939
1940                 }
1941         
1942         //print new *.pick.count_table
1943         if (hasCount && dups) {  newCount.printTable(newCountFile);   }
1944                 
1945                 return num;     
1946                 
1947         }
1948         catch(exception& e) {
1949                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1950                 exit(1);
1951         }
1952 }
1953 /**************************************************************************************************/
1954