]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
fixes while testing 1.33.0
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26         CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40
41                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49
50                 vector<string> myArray;
51                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){   
61         try {
62                 string helpString = "";
63                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68         helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
74                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 #ifdef USE_MPI
93                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 #endif
95                 helpString += "The chimera.uchime command should be in the following format: \n";
96                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
113         else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
114         else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false; hasName=false; hasCount=false;
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145                 
146                 //allow user to run help
147                 if(option == "help") { help(); abort = true; calledHelp = true; }
148                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
149                 
150                 else {
151                         vector<string> myArray = setParameters();
152                         
153                         OptionParser parser(option);
154                         map<string,string> parameters = parser.getParameters();
155                         
156                         ValidParameters validParameter("chimera.uchime");
157                         map<string,string>::iterator it;
158                         
159                         //check to make sure all parameters are valid for command
160                         for (it = parameters.begin(); it != parameters.end(); it++) { 
161                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
162                         }
163                         
164                         vector<string> tempOutNames;
165                         outputTypes["chimera"] = tempOutNames;
166                         outputTypes["accnos"] = tempOutNames;
167                         outputTypes["alns"] = tempOutNames;
168             outputTypes["count"] = tempOutNames;
169                         
170                         //if the user changes the input directory command factory will send this info to us in the output parameter 
171                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
172                         if (inputDir == "not found"){   inputDir = "";          }
173                         
174                         //check for required parameters
175                         fastafile = validParameter.validFile(parameters, "fasta", false);
176                         if (fastafile == "not found") {                                 
177                                 //if there is a current fasta file, use it
178                                 string filename = m->getFastaFile(); 
179                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
181                         }else { 
182                                 m->splitAtDash(fastafile, fastaFileNames);
183                                 
184                                 //go through files and make sure they are good, if not, then disregard them
185                                 for (int i = 0; i < fastaFileNames.size(); i++) {
186                                         
187                                         bool ignore = false;
188                                         if (fastaFileNames[i] == "current") { 
189                                                 fastaFileNames[i] = m->getFastaFile(); 
190                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
191                                                 else {  
192                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
193                                                         //erase from file list
194                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
195                                                         i--;
196                                                 }
197                                         }
198                                         
199                                         if (!ignore) {
200                                                 
201                                                 if (inputDir != "") {
202                                                         string path = m->hasPath(fastaFileNames[i]);
203                                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
205                                                 }
206                                                 
207                                                 int ableToOpen;
208                                                 ifstream in;
209                                                 
210                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
211                                                 
212                                                 //if you can't open it, try default location
213                                                 if (ableToOpen == 1) {
214                                                         if (m->getDefaultPath() != "") { //default path is set
215                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
217                                                                 ifstream in2;
218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219                                                                 in2.close();
220                                                                 fastaFileNames[i] = tryPath;
221                                                         }
222                                                 }
223                                                 
224                                                 if (ableToOpen == 1) {
225                                                         if (m->getOutputDir() != "") { //default path is set
226                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
228                                                                 ifstream in2;
229                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
230                                                                 in2.close();
231                                                                 fastaFileNames[i] = tryPath;
232                                                         }
233                                                 }
234                                                 
235                                                 in.close();
236                                                 
237                                                 if (ableToOpen == 1) { 
238                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
239                                                         //erase from file list
240                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
241                                                         i--;
242                                                 }else {
243                                                         m->setFastaFile(fastaFileNames[i]);
244                                                 }
245                                         }
246                                 }
247                                 
248                                 //make sure there is at least one valid file left
249                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
250                         }
251                         
252                         
253                         //check for required parameters
254                         namefile = validParameter.validFile(parameters, "name", false);
255                         if (namefile == "not found") { namefile = "";   }
256                         else { 
257                                 m->splitAtDash(namefile, nameFileNames);
258                                 
259                                 //go through files and make sure they are good, if not, then disregard them
260                                 for (int i = 0; i < nameFileNames.size(); i++) {
261                                         
262                                         bool ignore = false;
263                                         if (nameFileNames[i] == "current") { 
264                                                 nameFileNames[i] = m->getNameFile(); 
265                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
266                                                 else {  
267                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
268                                                         //erase from file list
269                                                         nameFileNames.erase(nameFileNames.begin()+i);
270                                                         i--;
271                                                 }
272                                         }
273                                         
274                                         if (!ignore) {
275                                                 
276                                                 if (inputDir != "") {
277                                                         string path = m->hasPath(nameFileNames[i]);
278                                                         //if the user has not given a path then, add inputdir. else leave path alone.
279                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
280                                                 }
281                                                 
282                                                 int ableToOpen;
283                                                 ifstream in;
284                                                 
285                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
286                                                 
287                                                 //if you can't open it, try default location
288                                                 if (ableToOpen == 1) {
289                                                         if (m->getDefaultPath() != "") { //default path is set
290                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
292                                                                 ifstream in2;
293                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
294                                                                 in2.close();
295                                                                 nameFileNames[i] = tryPath;
296                                                         }
297                                                 }
298                                                 
299                                                 if (ableToOpen == 1) {
300                                                         if (m->getOutputDir() != "") { //default path is set
301                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
303                                                                 ifstream in2;
304                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
305                                                                 in2.close();
306                                                                 nameFileNames[i] = tryPath;
307                                                         }
308                                                 }
309                                                 
310                                                 in.close();
311                                                 
312                                                 if (ableToOpen == 1) { 
313                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
314                                                         //erase from file list
315                                                         nameFileNames.erase(nameFileNames.begin()+i);
316                                                         i--;
317                                                 }else {
318                                                         m->setNameFile(nameFileNames[i]);
319                                                 }
320                                         }
321                                 }
322                         }
323             
324             if (nameFileNames.size() != 0) { hasName = true; }
325             
326             //check for required parameters
327             vector<string> countfileNames;
328                         countfile = validParameter.validFile(parameters, "count", false);
329                         if (countfile == "not found") { 
330                 countfile = "";  
331                         }else { 
332                                 m->splitAtDash(countfile, countfileNames);
333                                 
334                                 //go through files and make sure they are good, if not, then disregard them
335                                 for (int i = 0; i < countfileNames.size(); i++) {
336                                         
337                                         bool ignore = false;
338                                         if (countfileNames[i] == "current") { 
339                                                 countfileNames[i] = m->getCountTableFile(); 
340                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
341                                                 else {  
342                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
343                                                         //erase from file list
344                                                         countfileNames.erase(countfileNames.begin()+i);
345                                                         i--;
346                                                 }
347                                         }
348                                         
349                                         if (!ignore) {
350                                                 
351                                                 if (inputDir != "") {
352                                                         string path = m->hasPath(countfileNames[i]);
353                                                         //if the user has not given a path then, add inputdir. else leave path alone.
354                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
355                                                 }
356                                                 
357                                                 int ableToOpen;
358                                                 ifstream in;
359                                                 
360                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
361                                                 
362                                                 //if you can't open it, try default location
363                                                 if (ableToOpen == 1) {
364                                                         if (m->getDefaultPath() != "") { //default path is set
365                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
367                                                                 ifstream in2;
368                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
369                                                                 in2.close();
370                                                                 countfileNames[i] = tryPath;
371                                                         }
372                                                 }
373                                                 
374                                                 if (ableToOpen == 1) {
375                                                         if (m->getOutputDir() != "") { //default path is set
376                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
378                                                                 ifstream in2;
379                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
380                                                                 in2.close();
381                                                                 countfileNames[i] = tryPath;
382                                                         }
383                                                 }
384                                                 
385                                                 in.close();
386                                                 
387                                                 if (ableToOpen == 1) { 
388                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
389                                                         //erase from file list
390                                                         countfileNames.erase(countfileNames.begin()+i);
391                                                         i--;
392                                                 }else {
393                                                         m->setCountTableFile(countfileNames[i]);
394                                                 }
395                                         }
396                                 }
397                         }
398             
399             if (countfileNames.size() != 0) { hasCount = true; }
400             
401                         //make sure there is at least one valid file left
402             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
403             
404             if (!hasName && hasCount) { nameFileNames = countfileNames; }
405             
406                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
407                         
408                         bool hasGroup = true;
409                         groupfile = validParameter.validFile(parameters, "group", false);
410                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
411                         else { 
412                                 m->splitAtDash(groupfile, groupFileNames);
413                                 
414                                 //go through files and make sure they are good, if not, then disregard them
415                                 for (int i = 0; i < groupFileNames.size(); i++) {
416                                         
417                                         bool ignore = false;
418                                         if (groupFileNames[i] == "current") { 
419                                                 groupFileNames[i] = m->getGroupFile(); 
420                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
421                                                 else {  
422                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
423                                                         //erase from file list
424                                                         groupFileNames.erase(groupFileNames.begin()+i);
425                                                         i--;
426                                                 }
427                                         }
428                                         
429                                         if (!ignore) {
430                                                 
431                                                 if (inputDir != "") {
432                                                         string path = m->hasPath(groupFileNames[i]);
433                                                         //if the user has not given a path then, add inputdir. else leave path alone.
434                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
435                                                 }
436                                                 
437                                                 int ableToOpen;
438                                                 ifstream in;
439                                                 
440                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
441                                                 
442                                                 //if you can't open it, try default location
443                                                 if (ableToOpen == 1) {
444                                                         if (m->getDefaultPath() != "") { //default path is set
445                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
447                                                                 ifstream in2;
448                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
449                                                                 in2.close();
450                                                                 groupFileNames[i] = tryPath;
451                                                         }
452                                                 }
453                                                 
454                                                 if (ableToOpen == 1) {
455                                                         if (m->getOutputDir() != "") { //default path is set
456                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
458                                                                 ifstream in2;
459                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
460                                                                 in2.close();
461                                                                 groupFileNames[i] = tryPath;
462                                                         }
463                                                 }
464                                                 
465                                                 in.close();
466                                                 
467                                                 if (ableToOpen == 1) { 
468                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
469                                                         //erase from file list
470                                                         groupFileNames.erase(groupFileNames.begin()+i);
471                                                         i--;
472                                                 }else {
473                                                         m->setGroupFile(groupFileNames[i]);
474                                                 }
475                                         }
476                                 }
477                                 
478                                 //make sure there is at least one valid file left
479                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480                         }
481                         
482                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
483                         
484             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
485                         //if the user changes the output directory command factory will send this info to us in the output parameter 
486                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
487                         
488                         
489                         //if the user changes the output directory command factory will send this info to us in the output parameter 
490                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
491                         
492                         string path;
493                         it = parameters.find("reference");
494                         //user has given a template file
495                         if(it != parameters.end()){ 
496                                 if (it->second == "self") { templatefile = "self"; }
497                                 else {
498                                         path = m->hasPath(it->second);
499                                         //if the user has not given a path then, add inputdir. else leave path alone.
500                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
501                                         
502                                         templatefile = validParameter.validFile(parameters, "reference", true);
503                                         if (templatefile == "not open") { abort = true; }
504                                         else if (templatefile == "not found") { //check for saved reference sequences
505                                                 if (rdb->getSavedReference() != "") {
506                                                         templatefile = rdb->getSavedReference();
507                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
508                                                 }else {
509                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
510                                                         m->mothurOutEndLine();
511                                                         abort = true; 
512                                                 }
513                                         }
514                                 }
515                         }else if (hasName) {  templatefile = "self"; }
516             else if (hasCount) {  templatefile = "self"; }
517                         else { 
518                                 if (rdb->getSavedReference() != "") {
519                                         templatefile = rdb->getSavedReference();
520                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
521                                 }else {
522                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
523                                         m->mothurOutEndLine();
524                                         templatefile = ""; abort = true; 
525                                 } 
526                         }
527                                 
528                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
529                         m->setProcessors(temp);
530                         m->mothurConvert(temp, processors);
531                         
532                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
533                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
534                         
535                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
536                         chimealns = m->isTrue(temp); 
537                         
538                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
539                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
540                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
541                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
542                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
543                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
544                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
545                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
546                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
547                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
548                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
549                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
550             
551             strand = validParameter.validFile(parameters, "strand", false);     if (strand == "not found")      {  strand = ""; }
552                         
553                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
554                         ucl = m->isTrue(temp);
555                         
556                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
557                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
558                         
559                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
560                         skipgaps = m->isTrue(temp); 
561
562                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
563                         skipgaps2 = m->isTrue(temp); 
564             
565             
566                         temp = validParameter.validFile(parameters, "dereplicate", false);      
567                         if (temp == "not found") { temp = "false";                      }
568                         dups = m->isTrue(temp);
569
570                         
571                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
572             if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
573                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
574                         
575                         //look for uchime exe
576                         path = m->argv;
577                         string tempPath = path;
578                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
579                         path = path.substr(0, (tempPath.find_last_of('m')));
580                         
581                         string uchimeCommand;
582 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
583                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
584             if (m->debug) { 
585                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
586                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
587                 newCommand->execute();
588                 delete newCommand;
589                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
590                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
591                 newCommand->execute();
592                 delete newCommand;
593             }
594 #else
595                         uchimeCommand = path + "uchime.exe";
596 #endif
597         
598                         //test to make sure uchime exists
599                         ifstream in;
600                         uchimeCommand = m->getFullPathName(uchimeCommand);
601                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
602                         if(ableToOpen == 1) {   
603                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
604                 //check to see if uchime is in the path??
605                 
606                 string uLocation = m->findProgramPath("uchime");
607                 
608                 
609                 ifstream in2;
610 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
611                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
612 #else
613                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
614 #endif
615
616                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
617                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
618             }else {  uchimeLocation = uchimeCommand; }
619             
620             uchimeLocation = m->getFullPathName(uchimeLocation);
621         }
622         }
623         catch(exception& e) {
624                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
625                 exit(1);
626         }
627 }
628 //***************************************************************************************************************
629
630 int ChimeraUchimeCommand::execute(){
631         try{
632         
633         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
634                 
635                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
636                 
637                 for (int s = 0; s < fastaFileNames.size(); s++) {
638                         
639                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
640                         
641                         int start = time(NULL); 
642                         string nameFile = "";
643                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
644                         map<string, string> variables; 
645             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
646                         string outputFileName = getOutputFileName("chimera", variables);
647                         string accnosFileName = getOutputFileName("accnos", variables);
648                         string alnsFileName = getOutputFileName("alns", variables);
649                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
650             string newCountFile = "";
651                                 
652                         //you provided a groupfile
653                         string groupFile = "";
654             bool hasGroup = false;
655                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
656             else if (hasCount) {
657                 CountTable ct;
658                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
659                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
660                 newCountFile = getOutputFileName("count", variables);
661             }
662                         
663                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
664
665                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
666                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
667                                         nameFile = nameFileNames[s];
668                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
669                                                                                 
670                                 map<string, string> seqs;  
671                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
672
673                                 //read namefile
674                                 vector<seqPriorityNode> nameMapCount;
675                 int error;
676                 if (hasCount) {
677                     CountTable ct;
678                     ct.readTable(nameFile, true, false);
679                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
680                         int num = ct.getNumSeqs(it->first);
681                         if (num == 0) { error = 1; }
682                         else {
683                             seqPriorityNode temp(num, it->second, it->first);
684                             nameMapCount.push_back(temp);
685                         }
686                     }
687                 }else {
688                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
689                 }
690                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
691                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
692                                 
693                                 printFile(nameMapCount, newFasta);
694                                 fastaFileNames[s] = newFasta;
695                         }
696                         
697                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
698                         
699                         if (hasGroup) {
700                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
701                                         nameFile = nameFileNames[s];
702                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
703                                 
704                                 //Parse sequences by group
705                 vector<string> groups;
706                 map<string, string> uniqueNames;
707                 if (hasCount) {
708                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
709                     groups = cparser->getNamesOfGroups();
710                     uniqueNames = cparser->getAllSeqsMap();
711                 }else{
712                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
713                     groups = sparser->getNamesOfGroups();
714                     uniqueNames = sparser->getAllSeqsMap();
715                 }
716                                         
717                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
718                                                                 
719                                 //clears files
720                                 ofstream out, out1, out2;
721                                 m->openOutputFile(outputFileName, out); out.close(); 
722                                 m->openOutputFile(accnosFileName, out1); out1.close();
723                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
724                                 int totalSeqs = 0;
725                                 
726                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
727                     
728                     if (hasCount && dups) {
729                         CountTable c; c.readTable(nameFile, true, false);
730                         if (!m->isBlank(newCountFile)) {
731                             ifstream in2;
732                             m->openInputFile(newCountFile, in2);
733                             
734                             string name, group;
735                             while (!in2.eof()) {
736                                 in2 >> name >> group; m->gobble(in2);
737                                 c.setAbund(name, group, 0);
738                             }
739                             in2.close();
740                         }
741                         m->mothurRemove(newCountFile);
742                         c.printTable(newCountFile);
743                     }
744
745                 }else                           {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
746
747                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
748                
749                 
750                 if (!dups) { 
751                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
752                                 
753                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
754                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
755                                 }else {
756                     
757                     if (hasCount) {
758                         set<string> doNotRemove;
759                         CountTable c; c.readTable(newCountFile, true, true);
760                         vector<string> namesInTable = c.getNamesOfSeqs();
761                         for (int i = 0; i < namesInTable.size(); i++) {
762                             int temp = c.getNumSeqs(namesInTable[i]);
763                             if (temp == 0) {  c.remove(namesInTable[i]);  }
764                             else { doNotRemove.insert((namesInTable[i])); }
765                         }
766                         //remove names we want to keep from accnos file.
767                         set<string> accnosNames = m->readAccnos(accnosFileName);
768                         ofstream out2;
769                         m->openOutputFile(accnosFileName, out2);
770                         for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
771                             if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
772                         }
773                         out2.close();
774                         c.printTable(newCountFile);
775                         outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
776                     }
777                 }
778                 
779                 if (hasCount) { delete cparser; }
780                 else { delete sparser; }
781                 
782                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
783                                         
784                         }else{
785                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
786                         
787                                 int numSeqs = 0;
788                                 int numChimeras = 0;
789
790                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
791                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
792                                 
793                                 //add headings
794                                 ofstream out;
795                                 m->openOutputFile(outputFileName+".temp", out); 
796                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
797                                 out.close();
798                                 
799                                 m->appendFiles(outputFileName, outputFileName+".temp");
800                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
801                                 
802                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
803                         
804                                 //remove file made for uchime
805                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
806                         
807                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
808                         }
809                         
810                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
811                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
812                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
813                 }
814         
815                 //set accnos file as new current accnosfile
816                 string current = "";
817                 itTypes = outputTypes.find("accnos");
818                 if (itTypes != outputTypes.end()) {
819                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
820                 }
821         
822         itTypes = outputTypes.find("count");
823                 if (itTypes != outputTypes.end()) {
824                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
825                 }
826                 
827                 m->mothurOutEndLine();
828                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
829                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
830                 m->mothurOutEndLine();
831                 
832                 return 0;
833                 
834         }
835         catch(exception& e) {
836                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
837                 exit(1);
838         }
839 }
840 //**********************************************************************************************************************
841 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
842         try {
843                 map<string, string>::iterator itUnique;
844                 int total = 0;
845                 
846                 ofstream out2;
847                 m->openOutputFile(accnosFileName+".temp", out2);
848                 
849                 string name;
850                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
851                 set<string>::iterator itNames;
852                 set<string> chimerasInFile;
853                 set<string>::iterator itChimeras;
854
855         if (!m->isBlank(accnosFileName)) {
856             //edit accnos file
857             ifstream in2;
858             m->openInputFile(accnosFileName, in2);
859             
860             while (!in2.eof()) {
861                 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
862                 
863                 in2 >> name; m->gobble(in2);
864                 
865                 //find unique name
866                 itUnique = uniqueNames.find(name);
867                 
868                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
869                 else {
870                     itChimeras = chimerasInFile.find((itUnique->second));
871                     
872                     if (itChimeras == chimerasInFile.end()) {
873                         out2 << itUnique->second << endl;
874                         chimerasInFile.insert((itUnique->second));
875                         total++;
876                     }
877                 }
878             }
879             in2.close();
880         }
881                 out2.close();
882                 
883                 m->mothurRemove(accnosFileName);
884                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
885                 
886                 
887                 
888                 //edit chimera file
889                 ifstream in; 
890                 m->openInputFile(outputFileName, in);
891                 
892                 ofstream out;
893                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
894                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
895                 
896                 float temp1;
897                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
898                 name = "";
899                 namesInFile.clear();    
900                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
901                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
902                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
903                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
904                 */
905                 
906                 while (!in.eof()) {
907                         
908                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
909                         
910                         bool print = false;
911                         in >> temp1;    m->gobble(in);
912                         in >> name;             m->gobble(in);
913                         in >> parent1;  m->gobble(in);
914                         in >> parent2;  m->gobble(in);
915                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
916                         m->gobble(in);
917                         
918                         //parse name - name will look like U68590/ab=1/
919                         string restOfName = "";
920                         int pos = name.find_first_of('/');
921                         if (pos != string::npos) {
922                                 restOfName = name.substr(pos);
923                                 name = name.substr(0, pos);
924                         }
925                         
926                         //find unique name
927                         itUnique = uniqueNames.find(name);
928                         
929                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
930                         else {
931                                 name = itUnique->second;
932                                 //is this name already in the file
933                                 itNames = namesInFile.find((name));
934                                 
935                                 if (itNames == namesInFile.end()) { //no not in file
936                                         if (flag == "N") { //are you really a no??
937                                                 //is this sequence really not chimeric??
938                                                 itChimeras = chimerasInFile.find(name);
939                                                 
940                                                 //then you really are a no so print, otherwise skip
941                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
942                                         }else{ print = true; }
943                                 }
944                         }
945                         
946                         if (print) {
947                                 out << temp1 << '\t' << name << restOfName << '\t';
948                                 namesInFile.insert(name);
949                                 
950                                 //parse parent1 names
951                                 if (parent1 != "*") {
952                                         restOfName = "";
953                                         pos = parent1.find_first_of('/');
954                                         if (pos != string::npos) {
955                                                 restOfName = parent1.substr(pos);
956                                                 parent1 = parent1.substr(0, pos);
957                                         }
958                                         
959                                         itUnique = uniqueNames.find(parent1);
960                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
961                                         else {  out << itUnique->second << restOfName << '\t';  }
962                                 }else { out << parent1 << '\t'; }
963                                 
964                                 //parse parent2 names
965                                 if (parent2 != "*") {
966                                         restOfName = "";
967                                         pos = parent2.find_first_of('/');
968                                         if (pos != string::npos) {
969                                                 restOfName = parent2.substr(pos);
970                                                 parent2 = parent2.substr(0, pos);
971                                         }
972                                         
973                                         itUnique = uniqueNames.find(parent2);
974                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
975                                         else {  out << itUnique->second << restOfName << '\t';  }
976                                 }else { out << parent2 << '\t'; }
977                                 
978                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
979                         }
980                 }
981                 in.close();
982                 out.close();
983                 
984                 m->mothurRemove(outputFileName);
985                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
986                 
987                                 
988                 //edit anls file
989                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
990                 /*
991                  ------------------------------------------------------------------------
992                  Query   (  179 nt) F21Fcsw_11639/ab=591/
993                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
994                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
995                  
996                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
997                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
998                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
999                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
1000                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
1001                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1002                  
1003                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1004                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1005                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1006                  Diffs      NNN     N N  N                   N  N BB    NNN                              
1007                  Votes      000     0 0  0                   0  0 ++    000                              
1008                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1009                  
1010                  A   159 TGGAACTGAGACACGGTCCAA 179
1011                  Q   159 TGGAACTGAGACACGGTCCAA 179
1012                  B   161 TGGAACTGAGACACGGTCCAA 181
1013                  Diffs                        
1014                  Votes                        
1015                  Model   BBBBBBBBBBBBBBBBBBBBB
1016                  
1017                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1018                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1019                 */
1020                 if (chimealns) {
1021                         ifstream in3; 
1022                         m->openInputFile(alnsFileName, in3);
1023                 
1024                         ofstream out3;
1025                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1026                 
1027                         name = "";
1028                         namesInFile.clear();
1029                         string line = "";
1030                         
1031                         while (!in3.eof()) {
1032                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1033                                 
1034                                 line = "";
1035                                 line = m->getline(in3); 
1036                                 string temp = "";
1037                                 
1038                                 if (line != "") {
1039                                         istringstream iss(line);
1040                                         iss >> temp;
1041                                         
1042                                         //are you a name line
1043                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1044                                                 int spot = 0;
1045                                                 for (int i = 0; i < line.length(); i++) {
1046                                                         spot = i;
1047                                                         if (line[i] == ')') { break; }
1048                                                         else { out3 << line[i]; }
1049                                                 }
1050                                                 
1051                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1052                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1053                                                 else {
1054                                                         out << line[spot] << line[spot+1];
1055                                                         
1056                                                         name = line.substr(spot+2);
1057                                                         
1058                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1059                                                         string restOfName = "";
1060                                                         int pos = name.find_first_of('/');
1061                                                         if (pos != string::npos) {
1062                                                                 restOfName = name.substr(pos);
1063                                                                 name = name.substr(0, pos);
1064                                                         }
1065                                                         
1066                                                         //find unique name
1067                                                         itUnique = uniqueNames.find(name);
1068                                                         
1069                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1070                                                         else {
1071                                                                 //only limit repeats on query names
1072                                                                 if (temp == "Query") {
1073                                                                         itNames = namesInFile.find((itUnique->second));
1074                                                                         
1075                                                                         if (itNames == namesInFile.end()) {
1076                                                                                 out << itUnique->second << restOfName << endl;
1077                                                                                 namesInFile.insert((itUnique->second));
1078                                                                         }
1079                                                                 }else { out << itUnique->second << restOfName << endl;  }
1080                                                         }
1081                                                         
1082                                                 }
1083                                                 
1084                                         }else { //not need to alter line
1085                                                 out3 << line << endl;
1086                                         }
1087                                 }else { out3 << endl; }
1088                         }
1089                         in3.close();
1090                         out3.close();
1091                         
1092                         m->mothurRemove(alnsFileName);
1093                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1094                 }
1095                 
1096                 return total;
1097         }
1098         catch(exception& e) {
1099                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1100                 exit(1);
1101         }
1102 }       
1103 //**********************************************************************************************************************
1104 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1105         try {
1106                 
1107                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1108                 
1109                 ofstream out;
1110                 m->openOutputFile(filename, out);
1111                 
1112                 //print new file in order of
1113                 for (int i = 0; i < nameMapCount.size(); i++) {
1114                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1115                 }
1116                 out.close();
1117                 
1118                 return 0;
1119         }
1120         catch(exception& e) {
1121                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1122                 exit(1);
1123         }
1124 }       
1125 //**********************************************************************************************************************
1126 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1127         try {
1128                 //create input file for uchime
1129                 //read through fastafile and store info
1130                 ifstream in;
1131                 m->openInputFile(filename, in);
1132                 
1133                 while (!in.eof()) {
1134                         
1135                         if (m->control_pressed) { in.close(); return 0; }
1136                         
1137                         Sequence seq(in); m->gobble(in);
1138                         seqs[seq.getName()] = seq.getAligned();
1139                 }
1140                 in.close();
1141                 
1142                 return 0;
1143         }
1144         catch(exception& e) {
1145                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1146                 exit(1);
1147         }
1148 }       
1149 //**********************************************************************************************************************
1150
1151 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1152         try {
1153                 string nameFile = "";
1154                 
1155                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1156                 
1157                 //use unique.seqs to create new name and fastafile
1158                 string inputString = "fasta=" + inputFile;
1159                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1160                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1161                 m->mothurCalling = true;
1162         
1163                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1164                 uniqueCommand->execute();
1165                 
1166                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1167                 
1168                 delete uniqueCommand;
1169                 m->mothurCalling = false;
1170                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1171                 
1172                 nameFile = filenames["name"][0];
1173                 inputFile = filenames["fasta"][0];
1174                 
1175                 return nameFile;
1176         }
1177         catch(exception& e) {
1178                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1179                 exit(1);
1180         }
1181 }
1182 //**********************************************************************************************************************
1183 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1184         try {
1185                 
1186                 int totalSeqs = 0;
1187                 int numChimeras = 0;
1188         
1189         
1190         ofstream outCountList;
1191         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1192         
1193                 for (int i = start; i < end; i++) {
1194                         int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
1195             
1196                         int error;
1197             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1198             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1199                         
1200                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1201                         totalSeqs += numSeqs;
1202                         
1203                         if (m->control_pressed) { return 0; }
1204                         
1205                         //remove file made for uchime
1206                         if (!m->debug) {  m->mothurRemove(filename);  }
1207             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1208                         
1209             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1210             //This table will zero out group counts for seqs determined to be chimeric by that group.
1211             if (dups) {
1212                 if (!m->isBlank(accnos+groups[i])) {
1213                     ifstream in;
1214                     m->openInputFile(accnos+groups[i], in);
1215                     string name;
1216                     if (hasCount) {
1217                         while (!in.eof()) {
1218                             in >> name; m->gobble(in);
1219                             outCountList << name << '\t' << groups[i] << endl;
1220                         }
1221                         in.close();
1222                     }else {
1223                         map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1224                         map<string, string>::iterator itN;
1225                         ofstream out;
1226                         m->openOutputFile(accnos+groups[i]+".temp", out);
1227                         while (!in.eof()) {
1228                             in >> name; m->gobble(in); 
1229                             itN = thisnamemap.find(name);
1230                             if (itN != thisnamemap.end()) {
1231                                 vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
1232                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1233                                 
1234                             }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1235                         }
1236                         out.close();
1237                         in.close();
1238                         m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1239                     }
1240                    
1241                 }
1242             }
1243             
1244                         //append files
1245                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1246                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1247                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1248                         
1249                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1250                 }
1251
1252         if (hasCount && dups) { outCountList.close(); }
1253         
1254         return totalSeqs;
1255                 
1256         }
1257         catch(exception& e) {
1258                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1259                 exit(1);
1260         }
1261 }       
1262 //**********************************************************************************************************************
1263
1264 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1265         try {
1266                 
1267                 outputFName = m->getFullPathName(outputFName);
1268                 filename = m->getFullPathName(filename);
1269                 alns = m->getFullPathName(alns);
1270                 
1271                 //to allow for spaces in the path
1272                 outputFName = "\"" + outputFName + "\"";
1273                 filename = "\"" + filename + "\"";
1274                 alns = "\"" + alns + "\"";
1275                                 
1276                 vector<char*> cPara;
1277                 
1278                 string uchimeCommand = uchimeLocation;
1279         uchimeCommand = "\"" + uchimeCommand + "\" ";
1280         
1281         char* tempUchime;
1282                 tempUchime= new char[uchimeCommand.length()+1]; 
1283                 *tempUchime = '\0';
1284                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1285                 cPara.push_back(tempUchime);
1286                 
1287         //are you using a reference file
1288                 if (templatefile != "self") {
1289             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1290             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1291             filename = outputFileName;
1292             filename = "\"" + filename + "\"";
1293                         //add reference file
1294                         char* tempRef = new char[5]; 
1295                         //strcpy(tempRef, "--db"); 
1296                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1297                         cPara.push_back(tempRef);  
1298                         char* tempR = new char[templatefile.length()+1];
1299                         //strcpy(tempR, templatefile.c_str());
1300                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1301                         cPara.push_back(tempR);
1302                 }
1303                 
1304                 char* tempIn = new char[8]; 
1305                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1306                 //strcpy(tempIn, "--input"); 
1307                 cPara.push_back(tempIn);
1308                 char* temp = new char[filename.length()+1];
1309                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1310                 //strcpy(temp, filename.c_str());
1311                 cPara.push_back(temp);
1312                 
1313                 char* tempO = new char[12]; 
1314                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1315                 //strcpy(tempO, "--uchimeout"); 
1316                 cPara.push_back(tempO);
1317                 char* tempout = new char[outputFName.length()+1];
1318                 //strcpy(tempout, outputFName.c_str());
1319                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1320                 cPara.push_back(tempout);
1321                 
1322                 if (chimealns) {
1323                         char* tempA = new char[13]; 
1324                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1325                         //strcpy(tempA, "--uchimealns"); 
1326                         cPara.push_back(tempA);
1327                         char* tempa = new char[alns.length()+1];
1328                         //strcpy(tempa, alns.c_str());
1329                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1330                         cPara.push_back(tempa);
1331                 }
1332         
1333         if (strand != "") {
1334                         char* tempA = new char[9]; 
1335                         *tempA = '\0'; strncat(tempA, "--strand", 8);
1336                         cPara.push_back(tempA);
1337                         char* tempa = new char[strand.length()+1];
1338                         *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1339                         cPara.push_back(tempa);
1340                 }
1341                 
1342                 if (useAbskew) {
1343                         char* tempskew = new char[9];
1344                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1345                         //strcpy(tempskew, "--abskew"); 
1346                         cPara.push_back(tempskew);
1347                         char* tempSkew = new char[abskew.length()+1];
1348                         //strcpy(tempSkew, abskew.c_str());
1349                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1350                         cPara.push_back(tempSkew);
1351                 }
1352                 
1353                 if (useMinH) {
1354                         char* tempminh = new char[7]; 
1355                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1356                         //strcpy(tempminh, "--minh"); 
1357                         cPara.push_back(tempminh);
1358                         char* tempMinH = new char[minh.length()+1];
1359                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1360                         //strcpy(tempMinH, minh.c_str());
1361                         cPara.push_back(tempMinH);
1362                 }
1363                 
1364                 if (useMindiv) {
1365                         char* tempmindiv = new char[9]; 
1366                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1367                         //strcpy(tempmindiv, "--mindiv"); 
1368                         cPara.push_back(tempmindiv);
1369                         char* tempMindiv = new char[mindiv.length()+1];
1370                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1371                         //strcpy(tempMindiv, mindiv.c_str());
1372                         cPara.push_back(tempMindiv);
1373                 }
1374                 
1375                 if (useXn) {
1376                         char* tempxn = new char[5]; 
1377                         //strcpy(tempxn, "--xn"); 
1378                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1379                         cPara.push_back(tempxn);
1380                         char* tempXn = new char[xn.length()+1];
1381                         //strcpy(tempXn, xn.c_str());
1382                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1383                         cPara.push_back(tempXn);
1384                 }
1385                 
1386                 if (useDn) {
1387                         char* tempdn = new char[5]; 
1388                         //strcpy(tempdn, "--dn"); 
1389                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1390                         cPara.push_back(tempdn);
1391                         char* tempDn = new char[dn.length()+1];
1392                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1393                         //strcpy(tempDn, dn.c_str());
1394                         cPara.push_back(tempDn);
1395                 }
1396                 
1397                 if (useXa) {
1398                         char* tempxa = new char[5]; 
1399                         //strcpy(tempxa, "--xa"); 
1400                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1401                         cPara.push_back(tempxa);
1402                         char* tempXa = new char[xa.length()+1];
1403                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1404                         //strcpy(tempXa, xa.c_str());
1405                         cPara.push_back(tempXa);
1406                 }
1407                 
1408                 if (useChunks) {
1409                         char* tempchunks = new char[9]; 
1410                         //strcpy(tempchunks, "--chunks"); 
1411                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1412                         cPara.push_back(tempchunks);
1413                         char* tempChunks = new char[chunks.length()+1];
1414                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1415                         //strcpy(tempChunks, chunks.c_str());
1416                         cPara.push_back(tempChunks);
1417                 }
1418                 
1419                 if (useMinchunk) {
1420                         char* tempminchunk = new char[11]; 
1421                         //strcpy(tempminchunk, "--minchunk"); 
1422                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1423                         cPara.push_back(tempminchunk);
1424                         char* tempMinchunk = new char[minchunk.length()+1];
1425                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1426                         //strcpy(tempMinchunk, minchunk.c_str());
1427                         cPara.push_back(tempMinchunk);
1428                 }
1429                 
1430                 if (useIdsmoothwindow) {
1431                         char* tempidsmoothwindow = new char[17]; 
1432                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1433                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1434                         cPara.push_back(tempidsmoothwindow);
1435                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1436                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1437                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1438                         cPara.push_back(tempIdsmoothwindow);
1439                 }
1440                 
1441                 /*if (useMinsmoothid) {
1442                         char* tempminsmoothid = new char[14]; 
1443                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1444                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1445                         cPara.push_back(tempminsmoothid);
1446                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1447                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1448                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1449                         cPara.push_back(tempMinsmoothid);
1450                 }*/
1451                 
1452                 if (useMaxp) {
1453                         char* tempmaxp = new char[7]; 
1454                         //strcpy(tempmaxp, "--maxp"); 
1455                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1456                         cPara.push_back(tempmaxp);
1457                         char* tempMaxp = new char[maxp.length()+1];
1458                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1459                         //strcpy(tempMaxp, maxp.c_str());
1460                         cPara.push_back(tempMaxp);
1461                 }
1462                 
1463                 if (!skipgaps) {
1464                         char* tempskipgaps = new char[13]; 
1465                         //strcpy(tempskipgaps, "--[no]skipgaps");
1466                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1467                         cPara.push_back(tempskipgaps);
1468                 }
1469                 
1470                 if (!skipgaps2) {
1471                         char* tempskipgaps2 = new char[14]; 
1472                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1473                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1474                         cPara.push_back(tempskipgaps2);
1475                 }
1476                 
1477                 if (useMinlen) {
1478                         char* tempminlen = new char[9]; 
1479                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1480                         //strcpy(tempminlen, "--minlen"); 
1481                         cPara.push_back(tempminlen);
1482                         char* tempMinlen = new char[minlen.length()+1];
1483                         //strcpy(tempMinlen, minlen.c_str());
1484                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1485                         cPara.push_back(tempMinlen);
1486                 }
1487                 
1488                 if (useMaxlen) {
1489                         char* tempmaxlen = new char[9]; 
1490                         //strcpy(tempmaxlen, "--maxlen"); 
1491                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1492                         cPara.push_back(tempmaxlen);
1493                         char* tempMaxlen = new char[maxlen.length()+1];
1494                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1495                         //strcpy(tempMaxlen, maxlen.c_str());
1496                         cPara.push_back(tempMaxlen);
1497                 }
1498                 
1499                 if (ucl) {
1500                         char* tempucl = new char[5]; 
1501                         strcpy(tempucl, "--ucl"); 
1502                         cPara.push_back(tempucl);
1503                 }
1504                 
1505                 if (useQueryfract) {
1506                         char* tempqueryfract = new char[13]; 
1507                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1508                         //strcpy(tempqueryfract, "--queryfract"); 
1509                         cPara.push_back(tempqueryfract);
1510                         char* tempQueryfract = new char[queryfract.length()+1];
1511                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1512                         //strcpy(tempQueryfract, queryfract.c_str());
1513                         cPara.push_back(tempQueryfract);
1514                 }
1515                 
1516                 
1517                 char** uchimeParameters;
1518                 uchimeParameters = new char*[cPara.size()];
1519                 string commandString = "";
1520                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1521                 //int numArgs = cPara.size();
1522                 
1523                 //uchime_main(numArgs, uchimeParameters); 
1524                 //cout << "commandString = " << commandString << endl;
1525 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1526 #else
1527                 commandString = "\"" + commandString + "\"";
1528 #endif
1529         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1530                 system(commandString.c_str());
1531                 
1532                 //free memory
1533                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1534                 delete[] uchimeParameters; 
1535                 
1536                 //remove "" from filenames
1537                 outputFName = outputFName.substr(1, outputFName.length()-2);
1538                 filename = filename.substr(1, filename.length()-2);
1539                 alns = alns.substr(1, alns.length()-2);
1540                 
1541                 if (m->control_pressed) { return 0; }
1542                 
1543                 //create accnos file from uchime results
1544                 ifstream in; 
1545                 m->openInputFile(outputFName, in);
1546                 
1547                 ofstream out;
1548                 m->openOutputFile(accnos, out);
1549                 
1550                 int num = 0;
1551                 numChimeras = 0;
1552                 while(!in.eof()) {
1553                         
1554                         if (m->control_pressed) { break; }
1555                         
1556                         string name = "";
1557                         string chimeraFlag = "";
1558                         //in >> chimeraFlag >> name;
1559                         
1560             string line = m->getline(in);
1561             vector<string> pieces = m->splitWhiteSpace(line);
1562             if (pieces.size() > 2) { 
1563                 name = pieces[1];
1564                 //fix name if needed
1565                 if (templatefile == "self") { 
1566                     name = name.substr(0, name.length()-1); //rip off last /
1567                     name = name.substr(0, name.find_last_of('/'));
1568                 }
1569                 
1570                 chimeraFlag = pieces[pieces.size()-1];
1571                         }
1572                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1573                         m->gobble(in);
1574                         
1575                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1576                         num++;
1577                 }
1578                 in.close();
1579                 out.close();
1580                 
1581         //if (templatefile != "self") {  m->mothurRemove(filename); }
1582         
1583                 return num;
1584         }
1585         catch(exception& e) {
1586                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1587                 exit(1);
1588         }
1589 }
1590 /**************************************************************************************************/
1591 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1592 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1593         try {
1594         
1595         ifstream in;
1596         m->openInputFile(filename, in);
1597         
1598         ofstream out;
1599         m->openOutputFile(output, out);
1600         
1601         while (!in.eof()) {
1602             if (m->control_pressed) { break;  }
1603             
1604             Sequence seq(in); m->gobble(in);
1605             
1606             if (seq.getName() != "") { seq.printSequence(out); }
1607         }
1608         in.close();
1609         out.close();
1610         
1611         return 0;
1612     }
1613         catch(exception& e) {
1614                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1615                 exit(1);
1616         }
1617 }
1618 /**************************************************************************************************/
1619
1620 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1621         try {
1622                 
1623                 processIDS.clear();
1624                 int process = 1;
1625                 int num = 0;
1626                 vector<string> files;
1627                 
1628 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1629                 //break up file into multiple files
1630                 m->divideFile(filename, processors, files);
1631                 
1632                 if (m->control_pressed) {  return 0;  }
1633                                 
1634                 //loop through and create all the processes you want
1635                 while (process != processors) {
1636                         int pid = fork();
1637                         
1638                         if (pid > 0) {
1639                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1640                                 process++;
1641                         }else if (pid == 0){
1642                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1643                                 
1644                                 //pass numSeqs to parent
1645                                 ofstream out;
1646                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1647                                 m->openOutputFile(tempFile, out);
1648                                 out << num << endl;
1649                                 out << numChimeras << endl;
1650                                 out.close();
1651                                 
1652                                 exit(0);
1653                         }else { 
1654                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1655                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1656                                 exit(0);
1657                         }
1658                 }
1659                 
1660                 //do my part
1661                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1662                 
1663                 //force parent to wait until all the processes are done
1664                 for (int i=0;i<processIDS.size();i++) { 
1665                         int temp = processIDS[i];
1666                         wait(&temp);
1667                 }
1668                 
1669                 for (int i = 0; i < processIDS.size(); i++) {
1670                         ifstream in;
1671                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1672                         m->openInputFile(tempFile, in);
1673                         if (!in.eof()) { 
1674                                 int tempNum = 0; 
1675                                 in >> tempNum; m->gobble(in);
1676                                 num += tempNum; 
1677                                 in >> tempNum;
1678                                 numChimeras += tempNum;
1679                         }
1680                         in.close(); m->mothurRemove(tempFile);
1681                 }
1682 #else
1683                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1684                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1685                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1686                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1687                 
1688                 //divide file
1689                 int count = 0;
1690                 int spot = 0;
1691                 map<int, ofstream*> filehandles;
1692                 map<int, ofstream*>::iterator it3;
1693                 
1694                 ofstream* temp;
1695                 for (int i = 0; i < processors; i++) {
1696                         temp = new ofstream;
1697                         filehandles[i] = temp;
1698                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1699                         files.push_back(filename+toString(i)+".temp");
1700                 }
1701                 
1702                 ifstream in;
1703                 m->openInputFile(filename, in);
1704                 
1705                 while(!in.eof()) {
1706                         
1707                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1708                         
1709                         Sequence tempSeq(in); m->gobble(in); 
1710                         
1711                         if (tempSeq.getName() != "") {
1712                                 tempSeq.printSequence(*(filehandles[spot])); 
1713                                 spot++; count++;
1714                                 if (spot == processors) { spot = 0; }
1715                         }
1716                 }
1717                 in.close();
1718                 
1719                 //delete memory
1720                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1721                         (*(it3->second)).close();
1722                         delete it3->second;
1723                 }
1724                 
1725                 //sanity check for number of processors
1726                 if (count < processors) { processors = count; }
1727                 
1728                 vector<uchimeData*> pDataArray; 
1729                 DWORD   dwThreadIdArray[processors-1];
1730                 HANDLE  hThreadArray[processors-1]; 
1731                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1732                 
1733                 //Create processor worker threads.
1734                 for( int i=1; i<processors; i++ ){
1735                         // Allocate memory for thread data.
1736                         string extension = toString(i) + ".temp";
1737                         
1738                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
1739                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1740                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1741                         
1742                         pDataArray.push_back(tempUchime);
1743                         processIDS.push_back(i);
1744                         
1745                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1746                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1747                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1748                 }
1749                 
1750                 
1751                 //using the main process as a worker saves time and memory
1752                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1753                 
1754                 //Wait until all threads have terminated.
1755                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1756                 
1757                 //Close all thread handles and free memory allocations.
1758                 for(int i=0; i < pDataArray.size(); i++){
1759                         num += pDataArray[i]->count;
1760                         numChimeras += pDataArray[i]->numChimeras;
1761                         CloseHandle(hThreadArray[i]);
1762                         delete pDataArray[i];
1763                 }
1764 #endif          
1765                 
1766                 //append output files
1767                 for(int i=0;i<processIDS.size();i++){
1768                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1769                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1770                         
1771                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1772                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1773                         
1774                         if (chimealns) {
1775                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1776                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1777                         }
1778                 }
1779                 
1780                 //get rid of the file pieces.
1781                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1782                 return num;     
1783         }
1784         catch(exception& e) {
1785                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1786                 exit(1);
1787         }
1788 }
1789 /**************************************************************************************************/
1790
1791 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1792         try {
1793                 
1794                 processIDS.clear();
1795                 int process = 1;
1796                 int num = 0;
1797         
1798         CountTable newCount;
1799         if (hasCount && dups) { newCount.readTable(nameFile, true, false); }
1800                 
1801                 //sanity check
1802                 if (groups.size() < processors) { processors = groups.size(); }
1803                 
1804                 //divide the groups between the processors
1805                 vector<linePair> lines;
1806         int remainingPairs = groups.size();
1807         int startIndex = 0;
1808         for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
1809             int numPairs = remainingPairs; //case for last processor
1810             if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
1811             lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex
1812             startIndex = startIndex + numPairs;
1813             remainingPairs = remainingPairs - numPairs;
1814         }
1815
1816 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1817                                 
1818                 //loop through and create all the processes you want
1819                 while (process != processors) {
1820                         int pid = fork();
1821                         
1822                         if (pid > 0) {
1823                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1824                                 process++;
1825                         }else if (pid == 0){
1826                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1827                                 
1828                                 //pass numSeqs to parent
1829                                 ofstream out;
1830                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1831                                 m->openOutputFile(tempFile, out);
1832                                 out << num << endl;
1833                                 out.close();
1834                                 
1835                                 exit(0);
1836                         }else { 
1837                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1838                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1839                                 exit(0);
1840                         }
1841                 }
1842                 
1843                 //do my part
1844                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1845                 
1846                 //force parent to wait until all the processes are done
1847                 for (int i=0;i<processIDS.size();i++) { 
1848                         int temp = processIDS[i];
1849                         wait(&temp);
1850                 }
1851         
1852                 for (int i = 0; i < processIDS.size(); i++) {
1853                         ifstream in;
1854                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1855                         m->openInputFile(tempFile, in);
1856                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1857                         in.close(); m->mothurRemove(tempFile);
1858         }
1859         
1860 #else
1861                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1862                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1863                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1864                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1865                 
1866                 vector<uchimeData*> pDataArray; 
1867                 DWORD   dwThreadIdArray[processors-1];
1868                 HANDLE  hThreadArray[processors-1]; 
1869                 
1870                 //Create processor worker threads.
1871                 for( int i=1; i<processors; i++ ){
1872                         // Allocate memory for thread data.
1873                         string extension = toString(i) + ".temp";
1874                         
1875                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
1876                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1877                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1878                         
1879                         pDataArray.push_back(tempUchime);
1880                         processIDS.push_back(i);
1881                         
1882                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1883                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1884                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1885                 }
1886                 
1887                 
1888                 //using the main process as a worker saves time and memory
1889                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1890                 
1891                 //Wait until all threads have terminated.
1892                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1893                 
1894                 //Close all thread handles and free memory allocations.
1895                 for(int i=0; i < pDataArray.size(); i++){
1896                         num += pDataArray[i]->count;
1897                         CloseHandle(hThreadArray[i]);
1898                         delete pDataArray[i];
1899                 }
1900         
1901         
1902 #endif          
1903       
1904         //read my own
1905         if (hasCount && dups) {
1906             if (!m->isBlank(accnos + ".byCount")) {
1907                 ifstream in2;
1908                 m->openInputFile(accnos + ".byCount", in2);
1909                 
1910                 string name, group;
1911                 while (!in2.eof()) {
1912                     in2 >> name >> group; m->gobble(in2);
1913                     newCount.setAbund(name, group, 0);
1914                 }
1915                 in2.close();
1916             }
1917             m->mothurRemove(accnos + ".byCount");
1918         }
1919        
1920                 //append output files
1921                 for(int i=0;i<processIDS.size();i++){
1922                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1923                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1924                         
1925                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1926                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1927                         
1928                         if (chimealns) {
1929                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1930                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1931                         }
1932             
1933             if (hasCount && dups) {
1934                 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1935                     ifstream in2;
1936                     m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1937                     
1938                     string name, group;
1939                     while (!in2.eof()) {
1940                         in2 >> name >> group; m->gobble(in2);
1941                         newCount.setAbund(name, group, 0);
1942                     }
1943                     in2.close();
1944                 }
1945                 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1946             }
1947
1948                 }
1949         
1950         //print new *.pick.count_table
1951         if (hasCount && dups) {  newCount.printTable(newCountFile);   }
1952                 
1953                 return num;     
1954                 
1955         }
1956         catch(exception& e) {
1957                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1958                 exit(1);
1959         }
1960 }
1961 /**************************************************************************************************/
1962