]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
Merge remote-tracking branch 'origin'
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26         CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40
41                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49
50                 vector<string> myArray;
51                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){   
61         try {
62                 string helpString = "";
63                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68         helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
74                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 #ifdef USE_MPI
93                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 #endif
95                 helpString += "The chimera.uchime command should be in the following format: \n";
96                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
113         else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
114         else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false; hasName=false; hasCount=false;
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145                 
146                 //allow user to run help
147                 if(option == "help") { help(); abort = true; calledHelp = true; }
148                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
149                 
150                 else {
151                         vector<string> myArray = setParameters();
152                         
153                         OptionParser parser(option);
154                         map<string,string> parameters = parser.getParameters();
155                         
156                         ValidParameters validParameter("chimera.uchime");
157                         map<string,string>::iterator it;
158                         
159                         //check to make sure all parameters are valid for command
160                         for (it = parameters.begin(); it != parameters.end(); it++) { 
161                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
162                         }
163                         
164                         vector<string> tempOutNames;
165                         outputTypes["chimera"] = tempOutNames;
166                         outputTypes["accnos"] = tempOutNames;
167                         outputTypes["alns"] = tempOutNames;
168             outputTypes["count"] = tempOutNames;
169                         
170                         //if the user changes the input directory command factory will send this info to us in the output parameter 
171                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
172                         if (inputDir == "not found"){   inputDir = "";          }
173                         
174                         //check for required parameters
175                         fastafile = validParameter.validFile(parameters, "fasta", false);
176                         if (fastafile == "not found") {                                 
177                                 //if there is a current fasta file, use it
178                                 string filename = m->getFastaFile(); 
179                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
181                         }else { 
182                                 m->splitAtDash(fastafile, fastaFileNames);
183                                 
184                                 //go through files and make sure they are good, if not, then disregard them
185                                 for (int i = 0; i < fastaFileNames.size(); i++) {
186                                         
187                                         bool ignore = false;
188                                         if (fastaFileNames[i] == "current") { 
189                                                 fastaFileNames[i] = m->getFastaFile(); 
190                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
191                                                 else {  
192                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
193                                                         //erase from file list
194                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
195                                                         i--;
196                                                 }
197                                         }
198                                         
199                                         if (!ignore) {
200                                                 
201                                                 if (inputDir != "") {
202                                                         string path = m->hasPath(fastaFileNames[i]);
203                                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
205                                                 }
206                                                 
207                                                 int ableToOpen;
208                                                 ifstream in;
209                                                 
210                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
211                                                 
212                                                 //if you can't open it, try default location
213                                                 if (ableToOpen == 1) {
214                                                         if (m->getDefaultPath() != "") { //default path is set
215                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
217                                                                 ifstream in2;
218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219                                                                 in2.close();
220                                                                 fastaFileNames[i] = tryPath;
221                                                         }
222                                                 }
223                                                 
224                                                 if (ableToOpen == 1) {
225                                                         if (m->getOutputDir() != "") { //default path is set
226                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
228                                                                 ifstream in2;
229                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
230                                                                 in2.close();
231                                                                 fastaFileNames[i] = tryPath;
232                                                         }
233                                                 }
234                                                 
235                                                 in.close();
236                                                 
237                                                 if (ableToOpen == 1) { 
238                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
239                                                         //erase from file list
240                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
241                                                         i--;
242                                                 }else {
243                                                         m->setFastaFile(fastaFileNames[i]);
244                                                 }
245                                         }
246                                 }
247                                 
248                                 //make sure there is at least one valid file left
249                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
250                         }
251                         
252                         
253                         //check for required parameters
254                         namefile = validParameter.validFile(parameters, "name", false);
255                         if (namefile == "not found") { namefile = "";   }
256                         else { 
257                                 m->splitAtDash(namefile, nameFileNames);
258                                 
259                                 //go through files and make sure they are good, if not, then disregard them
260                                 for (int i = 0; i < nameFileNames.size(); i++) {
261                                         
262                                         bool ignore = false;
263                                         if (nameFileNames[i] == "current") { 
264                                                 nameFileNames[i] = m->getNameFile(); 
265                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
266                                                 else {  
267                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
268                                                         //erase from file list
269                                                         nameFileNames.erase(nameFileNames.begin()+i);
270                                                         i--;
271                                                 }
272                                         }
273                                         
274                                         if (!ignore) {
275                                                 
276                                                 if (inputDir != "") {
277                                                         string path = m->hasPath(nameFileNames[i]);
278                                                         //if the user has not given a path then, add inputdir. else leave path alone.
279                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
280                                                 }
281                                                 
282                                                 int ableToOpen;
283                                                 ifstream in;
284                                                 
285                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
286                                                 
287                                                 //if you can't open it, try default location
288                                                 if (ableToOpen == 1) {
289                                                         if (m->getDefaultPath() != "") { //default path is set
290                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
292                                                                 ifstream in2;
293                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
294                                                                 in2.close();
295                                                                 nameFileNames[i] = tryPath;
296                                                         }
297                                                 }
298                                                 
299                                                 if (ableToOpen == 1) {
300                                                         if (m->getOutputDir() != "") { //default path is set
301                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
303                                                                 ifstream in2;
304                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
305                                                                 in2.close();
306                                                                 nameFileNames[i] = tryPath;
307                                                         }
308                                                 }
309                                                 
310                                                 in.close();
311                                                 
312                                                 if (ableToOpen == 1) { 
313                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
314                                                         //erase from file list
315                                                         nameFileNames.erase(nameFileNames.begin()+i);
316                                                         i--;
317                                                 }else {
318                                                         m->setNameFile(nameFileNames[i]);
319                                                 }
320                                         }
321                                 }
322                         }
323             
324             if (nameFileNames.size() != 0) { hasName = true; }
325             
326             //check for required parameters
327             vector<string> countfileNames;
328                         countfile = validParameter.validFile(parameters, "count", false);
329                         if (countfile == "not found") { 
330                 countfile = "";  
331                         }else { 
332                                 m->splitAtDash(countfile, countfileNames);
333                                 
334                                 //go through files and make sure they are good, if not, then disregard them
335                                 for (int i = 0; i < countfileNames.size(); i++) {
336                                         
337                                         bool ignore = false;
338                                         if (countfileNames[i] == "current") { 
339                                                 countfileNames[i] = m->getCountTableFile(); 
340                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
341                                                 else {  
342                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
343                                                         //erase from file list
344                                                         countfileNames.erase(countfileNames.begin()+i);
345                                                         i--;
346                                                 }
347                                         }
348                                         
349                                         if (!ignore) {
350                                                 
351                                                 if (inputDir != "") {
352                                                         string path = m->hasPath(countfileNames[i]);
353                                                         //if the user has not given a path then, add inputdir. else leave path alone.
354                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
355                                                 }
356                                                 
357                                                 int ableToOpen;
358                                                 ifstream in;
359                                                 
360                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
361                                                 
362                                                 //if you can't open it, try default location
363                                                 if (ableToOpen == 1) {
364                                                         if (m->getDefaultPath() != "") { //default path is set
365                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
367                                                                 ifstream in2;
368                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
369                                                                 in2.close();
370                                                                 countfileNames[i] = tryPath;
371                                                         }
372                                                 }
373                                                 
374                                                 if (ableToOpen == 1) {
375                                                         if (m->getOutputDir() != "") { //default path is set
376                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
378                                                                 ifstream in2;
379                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
380                                                                 in2.close();
381                                                                 countfileNames[i] = tryPath;
382                                                         }
383                                                 }
384                                                 
385                                                 in.close();
386                                                 
387                                                 if (ableToOpen == 1) { 
388                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
389                                                         //erase from file list
390                                                         countfileNames.erase(countfileNames.begin()+i);
391                                                         i--;
392                                                 }else {
393                                                         m->setCountTableFile(countfileNames[i]);
394                                                 }
395                                         }
396                                 }
397                         }
398             
399             if (countfileNames.size() != 0) { hasCount = true; }
400             
401                         //make sure there is at least one valid file left
402             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
403             
404             if (!hasName && hasCount) { nameFileNames = countfileNames; }
405             
406                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
407                         
408                         bool hasGroup = true;
409                         groupfile = validParameter.validFile(parameters, "group", false);
410                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
411                         else { 
412                                 m->splitAtDash(groupfile, groupFileNames);
413                                 
414                                 //go through files and make sure they are good, if not, then disregard them
415                                 for (int i = 0; i < groupFileNames.size(); i++) {
416                                         
417                                         bool ignore = false;
418                                         if (groupFileNames[i] == "current") { 
419                                                 groupFileNames[i] = m->getGroupFile(); 
420                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
421                                                 else {  
422                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
423                                                         //erase from file list
424                                                         groupFileNames.erase(groupFileNames.begin()+i);
425                                                         i--;
426                                                 }
427                                         }
428                                         
429                                         if (!ignore) {
430                                                 
431                                                 if (inputDir != "") {
432                                                         string path = m->hasPath(groupFileNames[i]);
433                                                         //if the user has not given a path then, add inputdir. else leave path alone.
434                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
435                                                 }
436                                                 
437                                                 int ableToOpen;
438                                                 ifstream in;
439                                                 
440                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
441                                                 
442                                                 //if you can't open it, try default location
443                                                 if (ableToOpen == 1) {
444                                                         if (m->getDefaultPath() != "") { //default path is set
445                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
447                                                                 ifstream in2;
448                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
449                                                                 in2.close();
450                                                                 groupFileNames[i] = tryPath;
451                                                         }
452                                                 }
453                                                 
454                                                 if (ableToOpen == 1) {
455                                                         if (m->getOutputDir() != "") { //default path is set
456                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
458                                                                 ifstream in2;
459                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
460                                                                 in2.close();
461                                                                 groupFileNames[i] = tryPath;
462                                                         }
463                                                 }
464                                                 
465                                                 in.close();
466                                                 
467                                                 if (ableToOpen == 1) { 
468                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
469                                                         //erase from file list
470                                                         groupFileNames.erase(groupFileNames.begin()+i);
471                                                         i--;
472                                                 }else {
473                                                         m->setGroupFile(groupFileNames[i]);
474                                                 }
475                                         }
476                                 }
477                                 
478                                 //make sure there is at least one valid file left
479                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480                         }
481                         
482                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
483                         
484             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
485                         //if the user changes the output directory command factory will send this info to us in the output parameter 
486                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
487                         
488                         
489                         //if the user changes the output directory command factory will send this info to us in the output parameter 
490                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
491                         
492                         string path;
493                         it = parameters.find("reference");
494                         //user has given a template file
495                         if(it != parameters.end()){ 
496                                 if (it->second == "self") { templatefile = "self"; }
497                                 else {
498                                         path = m->hasPath(it->second);
499                                         //if the user has not given a path then, add inputdir. else leave path alone.
500                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
501                                         
502                                         templatefile = validParameter.validFile(parameters, "reference", true);
503                                         if (templatefile == "not open") { abort = true; }
504                                         else if (templatefile == "not found") { //check for saved reference sequences
505                                                 if (rdb->getSavedReference() != "") {
506                                                         templatefile = rdb->getSavedReference();
507                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
508                                                 }else {
509                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
510                                                         m->mothurOutEndLine();
511                                                         abort = true; 
512                                                 }
513                                         }
514                                 }
515                         }else if (hasName) {  templatefile = "self"; }
516             else if (hasCount) {  templatefile = "self"; }
517                         else { 
518                                 if (rdb->getSavedReference() != "") {
519                                         templatefile = rdb->getSavedReference();
520                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
521                                 }else {
522                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
523                                         m->mothurOutEndLine();
524                                         templatefile = ""; abort = true; 
525                                 } 
526                         }
527                                 
528                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
529                         m->setProcessors(temp);
530                         m->mothurConvert(temp, processors);
531                         
532                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
533                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
534                         
535                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
536                         chimealns = m->isTrue(temp); 
537                         
538                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
539                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
540                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
541                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
542                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
543                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
544                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
545                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
546                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
547                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
548                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
549                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
550             
551             strand = validParameter.validFile(parameters, "strand", false);     if (strand == "not found")      {  strand = ""; }
552                         
553                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
554                         ucl = m->isTrue(temp);
555                         
556                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
557                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
558                         
559                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
560                         skipgaps = m->isTrue(temp); 
561
562                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
563                         skipgaps2 = m->isTrue(temp); 
564             
565             
566                         temp = validParameter.validFile(parameters, "dereplicate", false);      
567                         if (temp == "not found") { 
568                                 if (groupfile != "")    {  temp = "false";                                      }
569                                 else                    {  temp = "true";       }
570                         }
571                         dups = m->isTrue(temp);
572
573                         
574                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
576                         
577                         //look for uchime exe
578                         path = m->argv;
579                         string tempPath = path;
580                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581                         path = path.substr(0, (tempPath.find_last_of('m')));
582                         
583                         string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
586             if (m->debug) { 
587                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
588                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589                 newCommand->execute();
590                 delete newCommand;
591                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
592                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593                 newCommand->execute();
594                 delete newCommand;
595             }
596 #else
597                         uchimeCommand = path + "uchime.exe";
598 #endif
599         
600                         //test to make sure uchime exists
601                         ifstream in;
602                         uchimeCommand = m->getFullPathName(uchimeCommand);
603                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604                         if(ableToOpen == 1) {   
605                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606                 //check to see if uchime is in the path??
607                 
608                 string uLocation = m->findProgramPath("uchime");
609                 
610                 
611                 ifstream in2;
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
614 #else
615                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
616 #endif
617
618                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
619                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620             }else {  uchimeLocation = uchimeCommand; }
621             
622             uchimeLocation = m->getFullPathName(uchimeLocation);
623         }
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
627                 exit(1);
628         }
629 }
630 //***************************************************************************************************************
631
632 int ChimeraUchimeCommand::execute(){
633         try{
634         
635         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
636                 
637                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
638                 
639                 for (int s = 0; s < fastaFileNames.size(); s++) {
640                         
641                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
642                         
643                         int start = time(NULL); 
644                         string nameFile = "";
645                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
646                         map<string, string> variables; 
647             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648                         string outputFileName = getOutputFileName("chimera", variables);
649                         string accnosFileName = getOutputFileName("accnos", variables);
650                         string alnsFileName = getOutputFileName("alns", variables);
651                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652             string newCountFile = "";
653                                 
654                         //you provided a groupfile
655                         string groupFile = "";
656             bool hasGroup = false;
657                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
658             else if (hasCount) {
659                 CountTable ct;
660                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662                 newCountFile = getOutputFileName("count", variables);
663             }
664                         
665                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
666
667                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669                                         nameFile = nameFileNames[s];
670                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
671                                                                                 
672                                 map<string, string> seqs;  
673                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
674
675                                 //read namefile
676                                 vector<seqPriorityNode> nameMapCount;
677                 int error;
678                 if (hasCount) {
679                     CountTable ct;
680                     ct.readTable(nameFile);
681                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682                         int num = ct.getNumSeqs(it->first);
683                         if (num == 0) { error = 1; }
684                         else {
685                             seqPriorityNode temp(num, it->second, it->first);
686                             nameMapCount.push_back(temp);
687                         }
688                     }
689                 }else {
690                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
691                 }
692                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
693                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
694                                 
695                                 printFile(nameMapCount, newFasta);
696                                 fastaFileNames[s] = newFasta;
697                         }
698                         
699                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
700                         
701                         if (hasGroup) {
702                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703                                         nameFile = nameFileNames[s];
704                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
705                                 
706                                 //Parse sequences by group
707                 vector<string> groups;
708                 map<string, string> uniqueNames;
709                 if (hasCount) {
710                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711                     groups = cparser->getNamesOfGroups();
712                     uniqueNames = cparser->getAllSeqsMap();
713                 }else{
714                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715                     groups = sparser->getNamesOfGroups();
716                     uniqueNames = sparser->getAllSeqsMap();
717                 }
718                                         
719                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
720                                                                 
721                                 //clears files
722                                 ofstream out, out1, out2;
723                                 m->openOutputFile(outputFileName, out); out.close(); 
724                                 m->openOutputFile(accnosFileName, out1); out1.close();
725                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
726                                 int totalSeqs = 0;
727                                 
728                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
729                     
730                     //read my own
731                     if (hasCount && !dups) {
732                         CountTable newCount; newCount.readTable(nameFile);
733                         
734                         if (!m->isBlank(newCountFile)) {
735                             ifstream in2;
736                             m->openInputFile(newCountFile, in2);
737                             
738                             string name, group;
739                             while (!in2.eof()) {
740                                 in2 >> name >> group; m->gobble(in2);
741                                 newCount.setAbund(name, group, 0);
742                             }
743                             in2.close();
744                         }
745                         m->mothurRemove(newCountFile);
746                         newCount.printTable(newCountFile);
747                     }
748
749                 }else                           {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
750
751                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
752                
753                 
754                 if (!dups) { 
755                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
756                                 
757                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
758                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
759                                 }else {
760                     /*if (hasCount) {  //removed empty seqs
761                         ofstream out2;
762                         m->openOutputFile(accnosFileName, out2);
763                         
764                         CountTable c; c.readTable(newCountFile);
765                         vector<string> nseqs = c.getNamesOfSeqs();
766                         vector<string> ngroups = c.getNamesOfGroups();
767                         for (int l = 0; l < nseqs.size(); l++) {
768                             if (c.getNumSeqs(nseqs[l]) == 0) {
769                                 c.remove(nseqs[l]);
770                                 out2 << nseqs[l] << endl;
771                             }
772                         }
773                         for (int l = 0; l < ngroups.size(); l++) {
774                             if (c.getGroupCount(ngroups[l]) == 0) {  c.removeGroup(ngroups[l]); }
775                         }
776                         out2.close();
777                         c.printTable(newCountFile);
778                     }*/
779                 }
780                 
781                 if (hasCount) { delete cparser; }
782                 else { delete sparser; }
783                 
784                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
785                                         
786                         }else{
787                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
788                         
789                                 int numSeqs = 0;
790                                 int numChimeras = 0;
791
792                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
794                                 
795                                 //add headings
796                                 ofstream out;
797                                 m->openOutputFile(outputFileName+".temp", out); 
798                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
799                                 out.close();
800                                 
801                                 m->appendFiles(outputFileName, outputFileName+".temp");
802                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
803                                 
804                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
805                         
806                                 //remove file made for uchime
807                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
808                         
809                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
810                         }
811                         
812                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
813                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
814                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
815                 }
816         
817                 //set accnos file as new current accnosfile
818                 string current = "";
819                 itTypes = outputTypes.find("accnos");
820                 if (itTypes != outputTypes.end()) {
821                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
822                 }
823                 
824                 m->mothurOutEndLine();
825                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
826                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
827                 m->mothurOutEndLine();
828                 
829                 return 0;
830                 
831         }
832         catch(exception& e) {
833                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
834                 exit(1);
835         }
836 }
837 //**********************************************************************************************************************
838 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
839         try {
840                 map<string, string>::iterator itUnique;
841                 int total = 0;
842                 
843                 //edit accnos file
844                 ifstream in2; 
845                 m->openInputFile(accnosFileName, in2);
846                 
847                 ofstream out2;
848                 m->openOutputFile(accnosFileName+".temp", out2);
849                 
850                 string name;
851                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
852                 set<string>::iterator itNames;
853                 set<string> chimerasInFile;
854                 set<string>::iterator itChimeras;
855
856                 
857                 while (!in2.eof()) {
858                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
859                         
860                         in2 >> name; m->gobble(in2);
861                         
862                         //find unique name
863                         itUnique = uniqueNames.find(name);
864                         
865                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
866                         else {
867                                 itChimeras = chimerasInFile.find((itUnique->second));
868                                 
869                                 if (itChimeras == chimerasInFile.end()) {
870                                         out2 << itUnique->second << endl;
871                                         chimerasInFile.insert((itUnique->second));
872                                         total++;
873                                 }
874                         }
875                 }
876                 in2.close();
877                 out2.close();
878                 
879                 m->mothurRemove(accnosFileName);
880                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
881                 
882                 
883                 
884                 //edit chimera file
885                 ifstream in; 
886                 m->openInputFile(outputFileName, in);
887                 
888                 ofstream out;
889                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
890                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
891                 
892                 float temp1;
893                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
894                 name = "";
895                 namesInFile.clear();    
896                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
897                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
898                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
899                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
900                 */
901                 
902                 while (!in.eof()) {
903                         
904                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
905                         
906                         bool print = false;
907                         in >> temp1;    m->gobble(in);
908                         in >> name;             m->gobble(in);
909                         in >> parent1;  m->gobble(in);
910                         in >> parent2;  m->gobble(in);
911                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
912                         m->gobble(in);
913                         
914                         //parse name - name will look like U68590/ab=1/
915                         string restOfName = "";
916                         int pos = name.find_first_of('/');
917                         if (pos != string::npos) {
918                                 restOfName = name.substr(pos);
919                                 name = name.substr(0, pos);
920                         }
921                         
922                         //find unique name
923                         itUnique = uniqueNames.find(name);
924                         
925                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
926                         else {
927                                 name = itUnique->second;
928                                 //is this name already in the file
929                                 itNames = namesInFile.find((name));
930                                 
931                                 if (itNames == namesInFile.end()) { //no not in file
932                                         if (flag == "N") { //are you really a no??
933                                                 //is this sequence really not chimeric??
934                                                 itChimeras = chimerasInFile.find(name);
935                                                 
936                                                 //then you really are a no so print, otherwise skip
937                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
938                                         }else{ print = true; }
939                                 }
940                         }
941                         
942                         if (print) {
943                                 out << temp1 << '\t' << name << restOfName << '\t';
944                                 namesInFile.insert(name);
945                                 
946                                 //parse parent1 names
947                                 if (parent1 != "*") {
948                                         restOfName = "";
949                                         pos = parent1.find_first_of('/');
950                                         if (pos != string::npos) {
951                                                 restOfName = parent1.substr(pos);
952                                                 parent1 = parent1.substr(0, pos);
953                                         }
954                                         
955                                         itUnique = uniqueNames.find(parent1);
956                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
957                                         else {  out << itUnique->second << restOfName << '\t';  }
958                                 }else { out << parent1 << '\t'; }
959                                 
960                                 //parse parent2 names
961                                 if (parent2 != "*") {
962                                         restOfName = "";
963                                         pos = parent2.find_first_of('/');
964                                         if (pos != string::npos) {
965                                                 restOfName = parent2.substr(pos);
966                                                 parent2 = parent2.substr(0, pos);
967                                         }
968                                         
969                                         itUnique = uniqueNames.find(parent2);
970                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
971                                         else {  out << itUnique->second << restOfName << '\t';  }
972                                 }else { out << parent2 << '\t'; }
973                                 
974                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
975                         }
976                 }
977                 in.close();
978                 out.close();
979                 
980                 m->mothurRemove(outputFileName);
981                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
982                 
983                                 
984                 //edit anls file
985                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
986                 /*
987                  ------------------------------------------------------------------------
988                  Query   (  179 nt) F21Fcsw_11639/ab=591/
989                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
990                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
991                  
992                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
993                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
994                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
995                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
996                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
997                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
998                  
999                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1000                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1001                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1002                  Diffs      NNN     N N  N                   N  N BB    NNN                              
1003                  Votes      000     0 0  0                   0  0 ++    000                              
1004                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1005                  
1006                  A   159 TGGAACTGAGACACGGTCCAA 179
1007                  Q   159 TGGAACTGAGACACGGTCCAA 179
1008                  B   161 TGGAACTGAGACACGGTCCAA 181
1009                  Diffs                        
1010                  Votes                        
1011                  Model   BBBBBBBBBBBBBBBBBBBBB
1012                  
1013                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1014                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1015                 */
1016                 if (chimealns) {
1017                         ifstream in3; 
1018                         m->openInputFile(alnsFileName, in3);
1019                 
1020                         ofstream out3;
1021                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1022                 
1023                         name = "";
1024                         namesInFile.clear();
1025                         string line = "";
1026                         
1027                         while (!in3.eof()) {
1028                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1029                                 
1030                                 line = "";
1031                                 line = m->getline(in3); 
1032                                 string temp = "";
1033                                 
1034                                 if (line != "") {
1035                                         istringstream iss(line);
1036                                         iss >> temp;
1037                                         
1038                                         //are you a name line
1039                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1040                                                 int spot = 0;
1041                                                 for (int i = 0; i < line.length(); i++) {
1042                                                         spot = i;
1043                                                         if (line[i] == ')') { break; }
1044                                                         else { out3 << line[i]; }
1045                                                 }
1046                                                 
1047                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1048                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1049                                                 else {
1050                                                         out << line[spot] << line[spot+1];
1051                                                         
1052                                                         name = line.substr(spot+2);
1053                                                         
1054                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1055                                                         string restOfName = "";
1056                                                         int pos = name.find_first_of('/');
1057                                                         if (pos != string::npos) {
1058                                                                 restOfName = name.substr(pos);
1059                                                                 name = name.substr(0, pos);
1060                                                         }
1061                                                         
1062                                                         //find unique name
1063                                                         itUnique = uniqueNames.find(name);
1064                                                         
1065                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1066                                                         else {
1067                                                                 //only limit repeats on query names
1068                                                                 if (temp == "Query") {
1069                                                                         itNames = namesInFile.find((itUnique->second));
1070                                                                         
1071                                                                         if (itNames == namesInFile.end()) {
1072                                                                                 out << itUnique->second << restOfName << endl;
1073                                                                                 namesInFile.insert((itUnique->second));
1074                                                                         }
1075                                                                 }else { out << itUnique->second << restOfName << endl;  }
1076                                                         }
1077                                                         
1078                                                 }
1079                                                 
1080                                         }else { //not need to alter line
1081                                                 out3 << line << endl;
1082                                         }
1083                                 }else { out3 << endl; }
1084                         }
1085                         in3.close();
1086                         out3.close();
1087                         
1088                         m->mothurRemove(alnsFileName);
1089                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1090                 }
1091                 
1092                 return total;
1093         }
1094         catch(exception& e) {
1095                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1096                 exit(1);
1097         }
1098 }       
1099 //**********************************************************************************************************************
1100 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1101         try {
1102                 
1103                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1104                 
1105                 ofstream out;
1106                 m->openOutputFile(filename, out);
1107                 
1108                 //print new file in order of
1109                 for (int i = 0; i < nameMapCount.size(); i++) {
1110                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1111                 }
1112                 out.close();
1113                 
1114                 return 0;
1115         }
1116         catch(exception& e) {
1117                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1118                 exit(1);
1119         }
1120 }       
1121 //**********************************************************************************************************************
1122 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1123         try {
1124                 //create input file for uchime
1125                 //read through fastafile and store info
1126                 ifstream in;
1127                 m->openInputFile(filename, in);
1128                 
1129                 while (!in.eof()) {
1130                         
1131                         if (m->control_pressed) { in.close(); return 0; }
1132                         
1133                         Sequence seq(in); m->gobble(in);
1134                         seqs[seq.getName()] = seq.getAligned();
1135                 }
1136                 in.close();
1137                 
1138                 return 0;
1139         }
1140         catch(exception& e) {
1141                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1142                 exit(1);
1143         }
1144 }       
1145 //**********************************************************************************************************************
1146
1147 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1148         try {
1149                 string nameFile = "";
1150                 
1151                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1152                 
1153                 //use unique.seqs to create new name and fastafile
1154                 string inputString = "fasta=" + inputFile;
1155                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1156                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1157                 m->mothurCalling = true;
1158         
1159                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1160                 uniqueCommand->execute();
1161                 
1162                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1163                 
1164                 delete uniqueCommand;
1165                 m->mothurCalling = false;
1166                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1167                 
1168                 nameFile = filenames["name"][0];
1169                 inputFile = filenames["fasta"][0];
1170                 
1171                 return nameFile;
1172         }
1173         catch(exception& e) {
1174                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1175                 exit(1);
1176         }
1177 }
1178 //**********************************************************************************************************************
1179 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1180         try {
1181                 
1182                 int totalSeqs = 0;
1183                 int numChimeras = 0;
1184         
1185         ofstream outCountList;
1186         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1187         
1188                 for (int i = start; i < end; i++) {
1189                         int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
1190             
1191                         int error;
1192             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1193             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1194                         
1195                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1196                         totalSeqs += numSeqs;
1197                         
1198                         if (m->control_pressed) { return 0; }
1199                         
1200                         //remove file made for uchime
1201                         if (!m->debug) {  m->mothurRemove(filename);  }
1202             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1203                         
1204             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1205             //This table will zero out group counts for seqs determined to be chimeric by that group.
1206             if (dups) {
1207                 if (!m->isBlank(accnos+groups[i])) {
1208                     ifstream in;
1209                     m->openInputFile(accnos+groups[i], in);
1210                     string name;
1211                     if (hasCount) {
1212                         while (!in.eof()) {
1213                             in >> name; m->gobble(in);
1214                             outCountList << name << '\t' << groups[i] << endl;
1215                         }
1216                         in.close();
1217                     }else {
1218                         map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1219                         map<string, string>::iterator itN;
1220                         ofstream out;
1221                         m->openOutputFile(accnos+groups[i]+".temp", out);
1222                         while (!in.eof()) {
1223                             in >> name; m->gobble(in); 
1224                             itN = thisnamemap.find(name);
1225                             if (itN != thisnamemap.end()) {
1226                                 vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
1227                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1228                                 
1229                             }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1230                         }
1231                         out.close();
1232                         in.close();
1233                         m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1234                     }
1235                    
1236                 }
1237             }
1238             
1239                         //append files
1240                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1241                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1242                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1243                         
1244                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1245                 }
1246
1247         if (hasCount && dups) { outCountList.close(); }
1248         
1249         return totalSeqs;
1250                 
1251         }
1252         catch(exception& e) {
1253                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1254                 exit(1);
1255         }
1256 }       
1257 //**********************************************************************************************************************
1258
1259 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1260         try {
1261                 
1262                 outputFName = m->getFullPathName(outputFName);
1263                 filename = m->getFullPathName(filename);
1264                 alns = m->getFullPathName(alns);
1265                 
1266                 //to allow for spaces in the path
1267                 outputFName = "\"" + outputFName + "\"";
1268                 filename = "\"" + filename + "\"";
1269                 alns = "\"" + alns + "\"";
1270                                 
1271                 vector<char*> cPara;
1272                 
1273                 string uchimeCommand = uchimeLocation;
1274         uchimeCommand = "\"" + uchimeCommand + "\" ";
1275         
1276         char* tempUchime;
1277                 tempUchime= new char[uchimeCommand.length()+1]; 
1278                 *tempUchime = '\0';
1279                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1280                 cPara.push_back(tempUchime);
1281                 
1282         //are you using a reference file
1283                 if (templatefile != "self") {
1284             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1285             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1286             filename = outputFileName;
1287             filename = "\"" + filename + "\"";
1288                         //add reference file
1289                         char* tempRef = new char[5]; 
1290                         //strcpy(tempRef, "--db"); 
1291                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1292                         cPara.push_back(tempRef);  
1293                         char* tempR = new char[templatefile.length()+1];
1294                         //strcpy(tempR, templatefile.c_str());
1295                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1296                         cPara.push_back(tempR);
1297                 }
1298                 
1299                 char* tempIn = new char[8]; 
1300                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1301                 //strcpy(tempIn, "--input"); 
1302                 cPara.push_back(tempIn);
1303                 char* temp = new char[filename.length()+1];
1304                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1305                 //strcpy(temp, filename.c_str());
1306                 cPara.push_back(temp);
1307                 
1308                 char* tempO = new char[12]; 
1309                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1310                 //strcpy(tempO, "--uchimeout"); 
1311                 cPara.push_back(tempO);
1312                 char* tempout = new char[outputFName.length()+1];
1313                 //strcpy(tempout, outputFName.c_str());
1314                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1315                 cPara.push_back(tempout);
1316                 
1317                 if (chimealns) {
1318                         char* tempA = new char[13]; 
1319                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1320                         //strcpy(tempA, "--uchimealns"); 
1321                         cPara.push_back(tempA);
1322                         char* tempa = new char[alns.length()+1];
1323                         //strcpy(tempa, alns.c_str());
1324                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1325                         cPara.push_back(tempa);
1326                 }
1327         
1328         if (strand != "") {
1329                         char* tempA = new char[9]; 
1330                         *tempA = '\0'; strncat(tempA, "--strand", 8);
1331                         cPara.push_back(tempA);
1332                         char* tempa = new char[strand.length()+1];
1333                         *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1334                         cPara.push_back(tempa);
1335                 }
1336                 
1337                 if (useAbskew) {
1338                         char* tempskew = new char[9];
1339                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1340                         //strcpy(tempskew, "--abskew"); 
1341                         cPara.push_back(tempskew);
1342                         char* tempSkew = new char[abskew.length()+1];
1343                         //strcpy(tempSkew, abskew.c_str());
1344                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1345                         cPara.push_back(tempSkew);
1346                 }
1347                 
1348                 if (useMinH) {
1349                         char* tempminh = new char[7]; 
1350                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1351                         //strcpy(tempminh, "--minh"); 
1352                         cPara.push_back(tempminh);
1353                         char* tempMinH = new char[minh.length()+1];
1354                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1355                         //strcpy(tempMinH, minh.c_str());
1356                         cPara.push_back(tempMinH);
1357                 }
1358                 
1359                 if (useMindiv) {
1360                         char* tempmindiv = new char[9]; 
1361                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1362                         //strcpy(tempmindiv, "--mindiv"); 
1363                         cPara.push_back(tempmindiv);
1364                         char* tempMindiv = new char[mindiv.length()+1];
1365                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1366                         //strcpy(tempMindiv, mindiv.c_str());
1367                         cPara.push_back(tempMindiv);
1368                 }
1369                 
1370                 if (useXn) {
1371                         char* tempxn = new char[5]; 
1372                         //strcpy(tempxn, "--xn"); 
1373                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1374                         cPara.push_back(tempxn);
1375                         char* tempXn = new char[xn.length()+1];
1376                         //strcpy(tempXn, xn.c_str());
1377                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1378                         cPara.push_back(tempXn);
1379                 }
1380                 
1381                 if (useDn) {
1382                         char* tempdn = new char[5]; 
1383                         //strcpy(tempdn, "--dn"); 
1384                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1385                         cPara.push_back(tempdn);
1386                         char* tempDn = new char[dn.length()+1];
1387                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1388                         //strcpy(tempDn, dn.c_str());
1389                         cPara.push_back(tempDn);
1390                 }
1391                 
1392                 if (useXa) {
1393                         char* tempxa = new char[5]; 
1394                         //strcpy(tempxa, "--xa"); 
1395                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1396                         cPara.push_back(tempxa);
1397                         char* tempXa = new char[xa.length()+1];
1398                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1399                         //strcpy(tempXa, xa.c_str());
1400                         cPara.push_back(tempXa);
1401                 }
1402                 
1403                 if (useChunks) {
1404                         char* tempchunks = new char[9]; 
1405                         //strcpy(tempchunks, "--chunks"); 
1406                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1407                         cPara.push_back(tempchunks);
1408                         char* tempChunks = new char[chunks.length()+1];
1409                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1410                         //strcpy(tempChunks, chunks.c_str());
1411                         cPara.push_back(tempChunks);
1412                 }
1413                 
1414                 if (useMinchunk) {
1415                         char* tempminchunk = new char[11]; 
1416                         //strcpy(tempminchunk, "--minchunk"); 
1417                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1418                         cPara.push_back(tempminchunk);
1419                         char* tempMinchunk = new char[minchunk.length()+1];
1420                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1421                         //strcpy(tempMinchunk, minchunk.c_str());
1422                         cPara.push_back(tempMinchunk);
1423                 }
1424                 
1425                 if (useIdsmoothwindow) {
1426                         char* tempidsmoothwindow = new char[17]; 
1427                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1428                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1429                         cPara.push_back(tempidsmoothwindow);
1430                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1431                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1432                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1433                         cPara.push_back(tempIdsmoothwindow);
1434                 }
1435                 
1436                 /*if (useMinsmoothid) {
1437                         char* tempminsmoothid = new char[14]; 
1438                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1439                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1440                         cPara.push_back(tempminsmoothid);
1441                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1442                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1443                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1444                         cPara.push_back(tempMinsmoothid);
1445                 }*/
1446                 
1447                 if (useMaxp) {
1448                         char* tempmaxp = new char[7]; 
1449                         //strcpy(tempmaxp, "--maxp"); 
1450                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1451                         cPara.push_back(tempmaxp);
1452                         char* tempMaxp = new char[maxp.length()+1];
1453                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1454                         //strcpy(tempMaxp, maxp.c_str());
1455                         cPara.push_back(tempMaxp);
1456                 }
1457                 
1458                 if (!skipgaps) {
1459                         char* tempskipgaps = new char[13]; 
1460                         //strcpy(tempskipgaps, "--[no]skipgaps");
1461                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1462                         cPara.push_back(tempskipgaps);
1463                 }
1464                 
1465                 if (!skipgaps2) {
1466                         char* tempskipgaps2 = new char[14]; 
1467                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1468                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1469                         cPara.push_back(tempskipgaps2);
1470                 }
1471                 
1472                 if (useMinlen) {
1473                         char* tempminlen = new char[9]; 
1474                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1475                         //strcpy(tempminlen, "--minlen"); 
1476                         cPara.push_back(tempminlen);
1477                         char* tempMinlen = new char[minlen.length()+1];
1478                         //strcpy(tempMinlen, minlen.c_str());
1479                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1480                         cPara.push_back(tempMinlen);
1481                 }
1482                 
1483                 if (useMaxlen) {
1484                         char* tempmaxlen = new char[9]; 
1485                         //strcpy(tempmaxlen, "--maxlen"); 
1486                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1487                         cPara.push_back(tempmaxlen);
1488                         char* tempMaxlen = new char[maxlen.length()+1];
1489                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1490                         //strcpy(tempMaxlen, maxlen.c_str());
1491                         cPara.push_back(tempMaxlen);
1492                 }
1493                 
1494                 if (ucl) {
1495                         char* tempucl = new char[5]; 
1496                         strcpy(tempucl, "--ucl"); 
1497                         cPara.push_back(tempucl);
1498                 }
1499                 
1500                 if (useQueryfract) {
1501                         char* tempqueryfract = new char[13]; 
1502                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1503                         //strcpy(tempqueryfract, "--queryfract"); 
1504                         cPara.push_back(tempqueryfract);
1505                         char* tempQueryfract = new char[queryfract.length()+1];
1506                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1507                         //strcpy(tempQueryfract, queryfract.c_str());
1508                         cPara.push_back(tempQueryfract);
1509                 }
1510                 
1511                 
1512                 char** uchimeParameters;
1513                 uchimeParameters = new char*[cPara.size()];
1514                 string commandString = "";
1515                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1516                 //int numArgs = cPara.size();
1517                 
1518                 //uchime_main(numArgs, uchimeParameters); 
1519                 //cout << "commandString = " << commandString << endl;
1520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1521 #else
1522                 commandString = "\"" + commandString + "\"";
1523 #endif
1524         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1525                 system(commandString.c_str());
1526                 
1527                 //free memory
1528                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1529                 delete[] uchimeParameters; 
1530                 
1531                 //remove "" from filenames
1532                 outputFName = outputFName.substr(1, outputFName.length()-2);
1533                 filename = filename.substr(1, filename.length()-2);
1534                 alns = alns.substr(1, alns.length()-2);
1535                 
1536                 if (m->control_pressed) { return 0; }
1537                 
1538                 //create accnos file from uchime results
1539                 ifstream in; 
1540                 m->openInputFile(outputFName, in);
1541                 
1542                 ofstream out;
1543                 m->openOutputFile(accnos, out);
1544                 
1545                 int num = 0;
1546                 numChimeras = 0;
1547                 while(!in.eof()) {
1548                         
1549                         if (m->control_pressed) { break; }
1550                         
1551                         string name = "";
1552                         string chimeraFlag = "";
1553                         //in >> chimeraFlag >> name;
1554                         
1555             string line = m->getline(in);
1556             vector<string> pieces = m->splitWhiteSpace(line);
1557             if (pieces.size() > 2) { 
1558                 name = pieces[1];
1559                 //fix name if needed
1560                 if (templatefile == "self") { 
1561                     name = name.substr(0, name.length()-1); //rip off last /
1562                     name = name.substr(0, name.find_last_of('/'));
1563                 }
1564                 
1565                 chimeraFlag = pieces[pieces.size()-1];
1566                         }
1567                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1568                         m->gobble(in);
1569                         
1570                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1571                         num++;
1572                 }
1573                 in.close();
1574                 out.close();
1575                 
1576         //if (templatefile != "self") {  m->mothurRemove(filename); }
1577         
1578                 return num;
1579         }
1580         catch(exception& e) {
1581                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1582                 exit(1);
1583         }
1584 }
1585 /**************************************************************************************************/
1586 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1587 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1588         try {
1589         
1590         ifstream in;
1591         m->openInputFile(filename, in);
1592         
1593         ofstream out;
1594         m->openOutputFile(output, out);
1595         
1596         while (!in.eof()) {
1597             if (m->control_pressed) { break;  }
1598             
1599             Sequence seq(in); m->gobble(in);
1600             
1601             if (seq.getName() != "") { seq.printSequence(out); }
1602         }
1603         in.close();
1604         out.close();
1605         
1606         return 0;
1607     }
1608         catch(exception& e) {
1609                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1610                 exit(1);
1611         }
1612 }
1613 /**************************************************************************************************/
1614
1615 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1616         try {
1617                 
1618                 processIDS.clear();
1619                 int process = 1;
1620                 int num = 0;
1621                 vector<string> files;
1622                 
1623 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1624                 //break up file into multiple files
1625                 m->divideFile(filename, processors, files);
1626                 
1627                 if (m->control_pressed) {  return 0;  }
1628                                 
1629                 //loop through and create all the processes you want
1630                 while (process != processors) {
1631                         int pid = fork();
1632                         
1633                         if (pid > 0) {
1634                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1635                                 process++;
1636                         }else if (pid == 0){
1637                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1638                                 
1639                                 //pass numSeqs to parent
1640                                 ofstream out;
1641                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1642                                 m->openOutputFile(tempFile, out);
1643                                 out << num << endl;
1644                                 out << numChimeras << endl;
1645                                 out.close();
1646                                 
1647                                 exit(0);
1648                         }else { 
1649                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1650                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1651                                 exit(0);
1652                         }
1653                 }
1654                 
1655                 //do my part
1656                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1657                 
1658                 //force parent to wait until all the processes are done
1659                 for (int i=0;i<processIDS.size();i++) { 
1660                         int temp = processIDS[i];
1661                         wait(&temp);
1662                 }
1663                 
1664                 for (int i = 0; i < processIDS.size(); i++) {
1665                         ifstream in;
1666                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1667                         m->openInputFile(tempFile, in);
1668                         if (!in.eof()) { 
1669                                 int tempNum = 0; 
1670                                 in >> tempNum; m->gobble(in);
1671                                 num += tempNum; 
1672                                 in >> tempNum;
1673                                 numChimeras += tempNum;
1674                         }
1675                         in.close(); m->mothurRemove(tempFile);
1676                 }
1677 #else
1678                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1679                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1680                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1681                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1682                 
1683                 //divide file
1684                 int count = 0;
1685                 int spot = 0;
1686                 map<int, ofstream*> filehandles;
1687                 map<int, ofstream*>::iterator it3;
1688                 
1689                 ofstream* temp;
1690                 for (int i = 0; i < processors; i++) {
1691                         temp = new ofstream;
1692                         filehandles[i] = temp;
1693                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1694                         files.push_back(filename+toString(i)+".temp");
1695                 }
1696                 
1697                 ifstream in;
1698                 m->openInputFile(filename, in);
1699                 
1700                 while(!in.eof()) {
1701                         
1702                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1703                         
1704                         Sequence tempSeq(in); m->gobble(in); 
1705                         
1706                         if (tempSeq.getName() != "") {
1707                                 tempSeq.printSequence(*(filehandles[spot])); 
1708                                 spot++; count++;
1709                                 if (spot == processors) { spot = 0; }
1710                         }
1711                 }
1712                 in.close();
1713                 
1714                 //delete memory
1715                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1716                         (*(it3->second)).close();
1717                         delete it3->second;
1718                 }
1719                 
1720                 //sanity check for number of processors
1721                 if (count < processors) { processors = count; }
1722                 
1723                 vector<uchimeData*> pDataArray; 
1724                 DWORD   dwThreadIdArray[processors-1];
1725                 HANDLE  hThreadArray[processors-1]; 
1726                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1727                 
1728                 //Create processor worker threads.
1729                 for( int i=1; i<processors; i++ ){
1730                         // Allocate memory for thread data.
1731                         string extension = toString(i) + ".temp";
1732                         
1733                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
1734                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1735                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1736                         
1737                         pDataArray.push_back(tempUchime);
1738                         processIDS.push_back(i);
1739                         
1740                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1741                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1742                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1743                 }
1744                 
1745                 
1746                 //using the main process as a worker saves time and memory
1747                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1748                 
1749                 //Wait until all threads have terminated.
1750                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1751                 
1752                 //Close all thread handles and free memory allocations.
1753                 for(int i=0; i < pDataArray.size(); i++){
1754                         num += pDataArray[i]->count;
1755                         numChimeras += pDataArray[i]->numChimeras;
1756                         CloseHandle(hThreadArray[i]);
1757                         delete pDataArray[i];
1758                 }
1759 #endif          
1760                 
1761                 //append output files
1762                 for(int i=0;i<processIDS.size();i++){
1763                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1764                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1765                         
1766                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1767                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1768                         
1769                         if (chimealns) {
1770                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1771                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1772                         }
1773                 }
1774                 
1775                 //get rid of the file pieces.
1776                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1777                 return num;     
1778         }
1779         catch(exception& e) {
1780                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1781                 exit(1);
1782         }
1783 }
1784 /**************************************************************************************************/
1785
1786 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1787         try {
1788                 
1789                 processIDS.clear();
1790                 int process = 1;
1791                 int num = 0;
1792         
1793         CountTable newCount;
1794         if (hasCount && dups) { newCount.readTable(nameFile); }
1795                 
1796                 //sanity check
1797                 if (groups.size() < processors) { processors = groups.size(); }
1798                 
1799                 //divide the groups between the processors
1800                 vector<linePair> lines;
1801                 int numGroupsPerProcessor = groups.size() / processors;
1802                 for (int i = 0; i < processors; i++) {
1803                         int startIndex =  i * numGroupsPerProcessor;
1804                         int endIndex = (i+1) * numGroupsPerProcessor;
1805                         if(i == (processors - 1)){      endIndex = groups.size();       }
1806                         lines.push_back(linePair(startIndex, endIndex));
1807                 }
1808                 
1809 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1810                                 
1811                 //loop through and create all the processes you want
1812                 while (process != processors) {
1813                         int pid = fork();
1814                         
1815                         if (pid > 0) {
1816                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1817                                 process++;
1818                         }else if (pid == 0){
1819                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1820                                 
1821                                 //pass numSeqs to parent
1822                                 ofstream out;
1823                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1824                                 m->openOutputFile(tempFile, out);
1825                                 out << num << endl;
1826                                 out.close();
1827                                 
1828                                 exit(0);
1829                         }else { 
1830                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1831                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1832                                 exit(0);
1833                         }
1834                 }
1835                 
1836                 //do my part
1837                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1838                 
1839                 //force parent to wait until all the processes are done
1840                 for (int i=0;i<processIDS.size();i++) { 
1841                         int temp = processIDS[i];
1842                         wait(&temp);
1843                 }
1844         
1845                 for (int i = 0; i < processIDS.size(); i++) {
1846                         ifstream in;
1847                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1848                         m->openInputFile(tempFile, in);
1849                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1850                         in.close(); m->mothurRemove(tempFile);
1851         }
1852         
1853 #else
1854                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1855                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1856                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1857                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1858                 
1859                 vector<uchimeData*> pDataArray; 
1860                 DWORD   dwThreadIdArray[processors-1];
1861                 HANDLE  hThreadArray[processors-1]; 
1862                 
1863                 //Create processor worker threads.
1864                 for( int i=1; i<processors; i++ ){
1865                         // Allocate memory for thread data.
1866                         string extension = toString(i) + ".temp";
1867                         
1868                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
1869                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1870                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1871                         
1872                         pDataArray.push_back(tempUchime);
1873                         processIDS.push_back(i);
1874                         
1875                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1876                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1877                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1878                 }
1879                 
1880                 
1881                 //using the main process as a worker saves time and memory
1882                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1883                 
1884                 //Wait until all threads have terminated.
1885                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1886                 
1887                 //Close all thread handles and free memory allocations.
1888                 for(int i=0; i < pDataArray.size(); i++){
1889                         num += pDataArray[i]->count;
1890                         CloseHandle(hThreadArray[i]);
1891                         delete pDataArray[i];
1892                 }
1893         
1894         
1895 #endif          
1896                 
1897         //read my own
1898         if (hasCount && dups) {
1899             if (!m->isBlank(accnos + ".byCount")) {
1900                 ifstream in2;
1901                 m->openInputFile(accnos + ".byCount", in2);
1902                 
1903                 string name, group;
1904                 while (!in2.eof()) {
1905                     in2 >> name >> group; m->gobble(in2);
1906                     newCount.setAbund(name, group, 0);
1907                 }
1908                 in2.close();
1909             }
1910             m->mothurRemove(accnos + ".byCount");
1911         }
1912                                 
1913                 //append output files
1914                 for(int i=0;i<processIDS.size();i++){
1915                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1916                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1917                         
1918                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1919                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1920                         
1921                         if (chimealns) {
1922                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1923                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1924                         }
1925             
1926             if (hasCount && dups) {
1927                 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1928                     ifstream in2;
1929                     m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1930                     
1931                     string name, group;
1932                     while (!in2.eof()) {
1933                         in2 >> name >> group; m->gobble(in2);
1934                         newCount.setAbund(name, group, 0);
1935                     }
1936                     in2.close();
1937                 }
1938                 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1939             }
1940
1941                 }
1942         
1943         //print new *.pick.count_table
1944         if (hasCount && dups) { newCount.printTable(newCountFile); }
1945                 
1946                 return num;     
1947                 
1948         }
1949         catch(exception& e) {
1950                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1951                 exit(1);
1952         }
1953 }
1954 /**************************************************************************************************/
1955