]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
e82dc1bc8dce5ffd0887025d5816b5eec8d0eab9
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26         CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40
41                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49
50                 vector<string> myArray;
51                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){   
61         try {
62                 string helpString = "";
63                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68         helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
74                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 #ifdef USE_MPI
93                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 #endif
95                 helpString += "The chimera.uchime command should be in the following format: \n";
96                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
113         else if (type == "alns") {  pattern = "[filename],uchime.alns"; }
114         else if (type == "count") {  pattern = "[filename],uchime.pick.count_table"; } 
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false; hasName=false; hasCount=false;
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145                 
146                 //allow user to run help
147                 if(option == "help") { help(); abort = true; calledHelp = true; }
148                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
149                 
150                 else {
151                         vector<string> myArray = setParameters();
152                         
153                         OptionParser parser(option);
154                         map<string,string> parameters = parser.getParameters();
155                         
156                         ValidParameters validParameter("chimera.uchime");
157                         map<string,string>::iterator it;
158                         
159                         //check to make sure all parameters are valid for command
160                         for (it = parameters.begin(); it != parameters.end(); it++) { 
161                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
162                         }
163                         
164                         vector<string> tempOutNames;
165                         outputTypes["chimera"] = tempOutNames;
166                         outputTypes["accnos"] = tempOutNames;
167                         outputTypes["alns"] = tempOutNames;
168             outputTypes["count"] = tempOutNames;
169                         
170                         //if the user changes the input directory command factory will send this info to us in the output parameter 
171                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
172                         if (inputDir == "not found"){   inputDir = "";          }
173                         
174                         //check for required parameters
175                         fastafile = validParameter.validFile(parameters, "fasta", false);
176                         if (fastafile == "not found") {                                 
177                                 //if there is a current fasta file, use it
178                                 string filename = m->getFastaFile(); 
179                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
180                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
181                         }else { 
182                                 m->splitAtDash(fastafile, fastaFileNames);
183                                 
184                                 //go through files and make sure they are good, if not, then disregard them
185                                 for (int i = 0; i < fastaFileNames.size(); i++) {
186                                         
187                                         bool ignore = false;
188                                         if (fastaFileNames[i] == "current") { 
189                                                 fastaFileNames[i] = m->getFastaFile(); 
190                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
191                                                 else {  
192                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
193                                                         //erase from file list
194                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
195                                                         i--;
196                                                 }
197                                         }
198                                         
199                                         if (!ignore) {
200                                                 
201                                                 if (inputDir != "") {
202                                                         string path = m->hasPath(fastaFileNames[i]);
203                                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
205                                                 }
206                                                 
207                                                 int ableToOpen;
208                                                 ifstream in;
209                                                 
210                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
211                                                 
212                                                 //if you can't open it, try default location
213                                                 if (ableToOpen == 1) {
214                                                         if (m->getDefaultPath() != "") { //default path is set
215                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
216                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
217                                                                 ifstream in2;
218                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219                                                                 in2.close();
220                                                                 fastaFileNames[i] = tryPath;
221                                                         }
222                                                 }
223                                                 
224                                                 if (ableToOpen == 1) {
225                                                         if (m->getOutputDir() != "") { //default path is set
226                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
227                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
228                                                                 ifstream in2;
229                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
230                                                                 in2.close();
231                                                                 fastaFileNames[i] = tryPath;
232                                                         }
233                                                 }
234                                                 
235                                                 in.close();
236                                                 
237                                                 if (ableToOpen == 1) { 
238                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
239                                                         //erase from file list
240                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
241                                                         i--;
242                                                 }else {
243                                                         m->setFastaFile(fastaFileNames[i]);
244                                                 }
245                                         }
246                                 }
247                                 
248                                 //make sure there is at least one valid file left
249                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
250                         }
251                         
252                         
253                         //check for required parameters
254                         namefile = validParameter.validFile(parameters, "name", false);
255                         if (namefile == "not found") { namefile = "";   }
256                         else { 
257                                 m->splitAtDash(namefile, nameFileNames);
258                                 
259                                 //go through files and make sure they are good, if not, then disregard them
260                                 for (int i = 0; i < nameFileNames.size(); i++) {
261                                         
262                                         bool ignore = false;
263                                         if (nameFileNames[i] == "current") { 
264                                                 nameFileNames[i] = m->getNameFile(); 
265                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
266                                                 else {  
267                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
268                                                         //erase from file list
269                                                         nameFileNames.erase(nameFileNames.begin()+i);
270                                                         i--;
271                                                 }
272                                         }
273                                         
274                                         if (!ignore) {
275                                                 
276                                                 if (inputDir != "") {
277                                                         string path = m->hasPath(nameFileNames[i]);
278                                                         //if the user has not given a path then, add inputdir. else leave path alone.
279                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
280                                                 }
281                                                 
282                                                 int ableToOpen;
283                                                 ifstream in;
284                                                 
285                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
286                                                 
287                                                 //if you can't open it, try default location
288                                                 if (ableToOpen == 1) {
289                                                         if (m->getDefaultPath() != "") { //default path is set
290                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
291                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
292                                                                 ifstream in2;
293                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
294                                                                 in2.close();
295                                                                 nameFileNames[i] = tryPath;
296                                                         }
297                                                 }
298                                                 
299                                                 if (ableToOpen == 1) {
300                                                         if (m->getOutputDir() != "") { //default path is set
301                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
302                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
303                                                                 ifstream in2;
304                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
305                                                                 in2.close();
306                                                                 nameFileNames[i] = tryPath;
307                                                         }
308                                                 }
309                                                 
310                                                 in.close();
311                                                 
312                                                 if (ableToOpen == 1) { 
313                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
314                                                         //erase from file list
315                                                         nameFileNames.erase(nameFileNames.begin()+i);
316                                                         i--;
317                                                 }else {
318                                                         m->setNameFile(nameFileNames[i]);
319                                                 }
320                                         }
321                                 }
322                         }
323             
324             if (nameFileNames.size() != 0) { hasName = true; }
325             
326             //check for required parameters
327             vector<string> countfileNames;
328                         countfile = validParameter.validFile(parameters, "count", false);
329                         if (countfile == "not found") { 
330                 countfile = "";  
331                         }else { 
332                                 m->splitAtDash(countfile, countfileNames);
333                                 
334                                 //go through files and make sure they are good, if not, then disregard them
335                                 for (int i = 0; i < countfileNames.size(); i++) {
336                                         
337                                         bool ignore = false;
338                                         if (countfileNames[i] == "current") { 
339                                                 countfileNames[i] = m->getCountTableFile(); 
340                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
341                                                 else {  
342                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
343                                                         //erase from file list
344                                                         countfileNames.erase(countfileNames.begin()+i);
345                                                         i--;
346                                                 }
347                                         }
348                                         
349                                         if (!ignore) {
350                                                 
351                                                 if (inputDir != "") {
352                                                         string path = m->hasPath(countfileNames[i]);
353                                                         //if the user has not given a path then, add inputdir. else leave path alone.
354                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
355                                                 }
356                                                 
357                                                 int ableToOpen;
358                                                 ifstream in;
359                                                 
360                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
361                                                 
362                                                 //if you can't open it, try default location
363                                                 if (ableToOpen == 1) {
364                                                         if (m->getDefaultPath() != "") { //default path is set
365                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
366                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
367                                                                 ifstream in2;
368                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
369                                                                 in2.close();
370                                                                 countfileNames[i] = tryPath;
371                                                         }
372                                                 }
373                                                 
374                                                 if (ableToOpen == 1) {
375                                                         if (m->getOutputDir() != "") { //default path is set
376                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
377                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
378                                                                 ifstream in2;
379                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
380                                                                 in2.close();
381                                                                 countfileNames[i] = tryPath;
382                                                         }
383                                                 }
384                                                 
385                                                 in.close();
386                                                 
387                                                 if (ableToOpen == 1) { 
388                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
389                                                         //erase from file list
390                                                         countfileNames.erase(countfileNames.begin()+i);
391                                                         i--;
392                                                 }else {
393                                                         m->setCountTableFile(countfileNames[i]);
394                                                 }
395                                         }
396                                 }
397                         }
398             
399             if (countfileNames.size() != 0) { hasCount = true; }
400             
401                         //make sure there is at least one valid file left
402             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
403             
404             if (!hasName && hasCount) { nameFileNames = countfileNames; }
405             
406                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
407                         
408                         bool hasGroup = true;
409                         groupfile = validParameter.validFile(parameters, "group", false);
410                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
411                         else { 
412                                 m->splitAtDash(groupfile, groupFileNames);
413                                 
414                                 //go through files and make sure they are good, if not, then disregard them
415                                 for (int i = 0; i < groupFileNames.size(); i++) {
416                                         
417                                         bool ignore = false;
418                                         if (groupFileNames[i] == "current") { 
419                                                 groupFileNames[i] = m->getGroupFile(); 
420                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
421                                                 else {  
422                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
423                                                         //erase from file list
424                                                         groupFileNames.erase(groupFileNames.begin()+i);
425                                                         i--;
426                                                 }
427                                         }
428                                         
429                                         if (!ignore) {
430                                                 
431                                                 if (inputDir != "") {
432                                                         string path = m->hasPath(groupFileNames[i]);
433                                                         //if the user has not given a path then, add inputdir. else leave path alone.
434                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
435                                                 }
436                                                 
437                                                 int ableToOpen;
438                                                 ifstream in;
439                                                 
440                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
441                                                 
442                                                 //if you can't open it, try default location
443                                                 if (ableToOpen == 1) {
444                                                         if (m->getDefaultPath() != "") { //default path is set
445                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
446                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
447                                                                 ifstream in2;
448                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
449                                                                 in2.close();
450                                                                 groupFileNames[i] = tryPath;
451                                                         }
452                                                 }
453                                                 
454                                                 if (ableToOpen == 1) {
455                                                         if (m->getOutputDir() != "") { //default path is set
456                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
457                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
458                                                                 ifstream in2;
459                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
460                                                                 in2.close();
461                                                                 groupFileNames[i] = tryPath;
462                                                         }
463                                                 }
464                                                 
465                                                 in.close();
466                                                 
467                                                 if (ableToOpen == 1) { 
468                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
469                                                         //erase from file list
470                                                         groupFileNames.erase(groupFileNames.begin()+i);
471                                                         i--;
472                                                 }else {
473                                                         m->setGroupFile(groupFileNames[i]);
474                                                 }
475                                         }
476                                 }
477                                 
478                                 //make sure there is at least one valid file left
479                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
480                         }
481                         
482                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
483                         
484             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
485                         //if the user changes the output directory command factory will send this info to us in the output parameter 
486                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
487                         
488                         
489                         //if the user changes the output directory command factory will send this info to us in the output parameter 
490                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
491                         
492                         string path;
493                         it = parameters.find("reference");
494                         //user has given a template file
495                         if(it != parameters.end()){ 
496                                 if (it->second == "self") { templatefile = "self"; }
497                                 else {
498                                         path = m->hasPath(it->second);
499                                         //if the user has not given a path then, add inputdir. else leave path alone.
500                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
501                                         
502                                         templatefile = validParameter.validFile(parameters, "reference", true);
503                                         if (templatefile == "not open") { abort = true; }
504                                         else if (templatefile == "not found") { //check for saved reference sequences
505                                                 if (rdb->getSavedReference() != "") {
506                                                         templatefile = rdb->getSavedReference();
507                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
508                                                 }else {
509                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
510                                                         m->mothurOutEndLine();
511                                                         abort = true; 
512                                                 }
513                                         }
514                                 }
515                         }else if (hasName) {  templatefile = "self"; }
516             else if (hasCount) {  templatefile = "self"; }
517                         else { 
518                                 if (rdb->getSavedReference() != "") {
519                                         templatefile = rdb->getSavedReference();
520                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
521                                 }else {
522                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
523                                         m->mothurOutEndLine();
524                                         templatefile = ""; abort = true; 
525                                 } 
526                         }
527                                 
528                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
529                         m->setProcessors(temp);
530                         m->mothurConvert(temp, processors);
531                         
532                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
533                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
534                         
535                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
536                         chimealns = m->isTrue(temp); 
537                         
538                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
539                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
540                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
541                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
542                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
543                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
544                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
545                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
546                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
547                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
548                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
549                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
550             
551             strand = validParameter.validFile(parameters, "strand", false);     if (strand == "not found")      {  strand = ""; }
552                         
553                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
554                         ucl = m->isTrue(temp);
555                         
556                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
557                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
558                         
559                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
560                         skipgaps = m->isTrue(temp); 
561
562                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
563                         skipgaps2 = m->isTrue(temp); 
564             
565             
566                         temp = validParameter.validFile(parameters, "dereplicate", false);      
567                         if (temp == "not found") { 
568                                 if (groupfile != "")    {  temp = "false";                                      }
569                                 else                    {  temp = "true";       }
570                         }
571                         dups = m->isTrue(temp);
572
573                         
574                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
576                         
577                         //look for uchime exe
578                         path = m->argv;
579                         string tempPath = path;
580                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
581                         path = path.substr(0, (tempPath.find_last_of('m')));
582                         
583                         string uchimeCommand;
584 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
585                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
586             if (m->debug) { 
587                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
588                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
589                 newCommand->execute();
590                 delete newCommand;
591                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
592                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
593                 newCommand->execute();
594                 delete newCommand;
595             }
596 #else
597                         uchimeCommand = path + "uchime.exe";
598 #endif
599         
600                         //test to make sure uchime exists
601                         ifstream in;
602                         uchimeCommand = m->getFullPathName(uchimeCommand);
603                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
604                         if(ableToOpen == 1) {   
605                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
606                 //check to see if uchime is in the path??
607                 
608                 string uLocation = m->findProgramPath("uchime");
609                 
610                 
611                 ifstream in2;
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
613                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
614 #else
615                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
616 #endif
617
618                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
619                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
620             }else {  uchimeLocation = uchimeCommand; }
621             
622             uchimeLocation = m->getFullPathName(uchimeLocation);
623         }
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
627                 exit(1);
628         }
629 }
630 //***************************************************************************************************************
631
632 int ChimeraUchimeCommand::execute(){
633         try{
634         
635         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
636                 
637                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
638                 
639                 for (int s = 0; s < fastaFileNames.size(); s++) {
640                         
641                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
642                         
643                         int start = time(NULL); 
644                         string nameFile = "";
645                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
646                         map<string, string> variables; 
647             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
648                         string outputFileName = getOutputFileName("chimera", variables);
649                         string accnosFileName = getOutputFileName("accnos", variables);
650                         string alnsFileName = getOutputFileName("alns", variables);
651                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
652             string newCountFile = "";
653                                 
654                         //you provided a groupfile
655                         string groupFile = "";
656             bool hasGroup = false;
657                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
658             else if (hasCount) {
659                 CountTable ct;
660                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
661                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
662                 newCountFile = getOutputFileName("count", variables);
663             }
664                         
665                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
666
667                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
668                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
669                                         nameFile = nameFileNames[s];
670                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
671                                                                                 
672                                 map<string, string> seqs;  
673                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
674
675                                 //read namefile
676                                 vector<seqPriorityNode> nameMapCount;
677                 int error;
678                 if (hasCount) {
679                     CountTable ct;
680                     ct.readTable(nameFile);
681                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
682                         int num = ct.getNumSeqs(it->first);
683                         if (num == 0) { error = 1; }
684                         else {
685                             seqPriorityNode temp(num, it->second, it->first);
686                             nameMapCount.push_back(temp);
687                         }
688                     }
689                 }else {
690                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
691                 }
692                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
693                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
694                                 
695                                 printFile(nameMapCount, newFasta);
696                                 fastaFileNames[s] = newFasta;
697                         }
698                         
699                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
700                         
701                         if (hasGroup) {
702                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
703                                         nameFile = nameFileNames[s];
704                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
705                                 
706                                 //Parse sequences by group
707                 vector<string> groups;
708                 map<string, string> uniqueNames;
709                 if (hasCount) {
710                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
711                     groups = cparser->getNamesOfGroups();
712                     uniqueNames = cparser->getAllSeqsMap();
713                 }else{
714                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
715                     groups = sparser->getNamesOfGroups();
716                     uniqueNames = sparser->getAllSeqsMap();
717                 }
718                                         
719                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
720                                                                 
721                                 //clears files
722                                 ofstream out, out1, out2;
723                                 m->openOutputFile(outputFileName, out); out.close(); 
724                                 m->openOutputFile(accnosFileName, out1); out1.close();
725                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
726                                 int totalSeqs = 0;
727                                 
728                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
729                     
730                     if (hasCount && dups) {
731                         CountTable c; c.readTable(nameFile);
732                         if (!m->isBlank(newCountFile)) {
733                             ifstream in2;
734                             m->openInputFile(newCountFile, in2);
735                             
736                             string name, group;
737                             while (!in2.eof()) {
738                                 in2 >> name >> group; m->gobble(in2);
739                                 c.setAbund(name, group, 0);
740                             }
741                             in2.close();
742                         }
743                         m->mothurRemove(newCountFile);
744                         c.printTable(newCountFile);
745                     }
746
747                 }else                           {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
748
749                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
750                
751                 
752                 if (!dups) { 
753                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
754                                 
755                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
756                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
757                                 }else {
758                     
759                     if (hasCount) {
760                         set<string> doNotRemove;
761                         CountTable c; c.readTable(newCountFile);
762                         vector<string> namesInTable = c.getNamesOfSeqs();
763                         for (int i = 0; i < namesInTable.size(); i++) {
764                             int temp = c.getNumSeqs(namesInTable[i]);
765                             if (temp == 0) {  c.remove(namesInTable[i]);  }
766                             else { doNotRemove.insert((namesInTable[i])); }
767                         }
768                         //remove names we want to keep from accnos file.
769                         set<string> accnosNames = m->readAccnos(accnosFileName);
770                         ofstream out2;
771                         m->openOutputFile(accnosFileName, out2);
772                         for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
773                             if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
774                         }
775                         out2.close();
776                         c.printTable(newCountFile);
777                         outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
778                     }
779                 }
780                 
781                 if (hasCount) { delete cparser; }
782                 else { delete sparser; }
783                 
784                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
785                                         
786                         }else{
787                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
788                         
789                                 int numSeqs = 0;
790                                 int numChimeras = 0;
791
792                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
793                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
794                                 
795                                 //add headings
796                                 ofstream out;
797                                 m->openOutputFile(outputFileName+".temp", out); 
798                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
799                                 out.close();
800                                 
801                                 m->appendFiles(outputFileName, outputFileName+".temp");
802                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
803                                 
804                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
805                         
806                                 //remove file made for uchime
807                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
808                         
809                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
810                         }
811                         
812                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
813                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
814                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
815                 }
816         
817                 //set accnos file as new current accnosfile
818                 string current = "";
819                 itTypes = outputTypes.find("accnos");
820                 if (itTypes != outputTypes.end()) {
821                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
822                 }
823         
824         itTypes = outputTypes.find("count");
825                 if (itTypes != outputTypes.end()) {
826                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
827                 }
828                 
829                 m->mothurOutEndLine();
830                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
831                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
832                 m->mothurOutEndLine();
833                 
834                 return 0;
835                 
836         }
837         catch(exception& e) {
838                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
839                 exit(1);
840         }
841 }
842 //**********************************************************************************************************************
843 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
844         try {
845                 map<string, string>::iterator itUnique;
846                 int total = 0;
847                 
848                 ofstream out2;
849                 m->openOutputFile(accnosFileName+".temp", out2);
850                 
851                 string name;
852                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
853                 set<string>::iterator itNames;
854                 set<string> chimerasInFile;
855                 set<string>::iterator itChimeras;
856
857         if (!m->isBlank(accnosFileName)) {
858             //edit accnos file
859             ifstream in2;
860             m->openInputFile(accnosFileName, in2);
861             
862             while (!in2.eof()) {
863                 if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
864                 
865                 in2 >> name; m->gobble(in2);
866                 
867                 //find unique name
868                 itUnique = uniqueNames.find(name);
869                 
870                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
871                 else {
872                     itChimeras = chimerasInFile.find((itUnique->second));
873                     
874                     if (itChimeras == chimerasInFile.end()) {
875                         out2 << itUnique->second << endl;
876                         chimerasInFile.insert((itUnique->second));
877                         total++;
878                     }
879                 }
880             }
881             in2.close();
882         }
883                 out2.close();
884                 
885                 m->mothurRemove(accnosFileName);
886                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
887                 
888                 
889                 
890                 //edit chimera file
891                 ifstream in; 
892                 m->openInputFile(outputFileName, in);
893                 
894                 ofstream out;
895                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
896                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
897                 
898                 float temp1;
899                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
900                 name = "";
901                 namesInFile.clear();    
902                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
903                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
904                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
905                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
906                 */
907                 
908                 while (!in.eof()) {
909                         
910                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
911                         
912                         bool print = false;
913                         in >> temp1;    m->gobble(in);
914                         in >> name;             m->gobble(in);
915                         in >> parent1;  m->gobble(in);
916                         in >> parent2;  m->gobble(in);
917                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
918                         m->gobble(in);
919                         
920                         //parse name - name will look like U68590/ab=1/
921                         string restOfName = "";
922                         int pos = name.find_first_of('/');
923                         if (pos != string::npos) {
924                                 restOfName = name.substr(pos);
925                                 name = name.substr(0, pos);
926                         }
927                         
928                         //find unique name
929                         itUnique = uniqueNames.find(name);
930                         
931                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
932                         else {
933                                 name = itUnique->second;
934                                 //is this name already in the file
935                                 itNames = namesInFile.find((name));
936                                 
937                                 if (itNames == namesInFile.end()) { //no not in file
938                                         if (flag == "N") { //are you really a no??
939                                                 //is this sequence really not chimeric??
940                                                 itChimeras = chimerasInFile.find(name);
941                                                 
942                                                 //then you really are a no so print, otherwise skip
943                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
944                                         }else{ print = true; }
945                                 }
946                         }
947                         
948                         if (print) {
949                                 out << temp1 << '\t' << name << restOfName << '\t';
950                                 namesInFile.insert(name);
951                                 
952                                 //parse parent1 names
953                                 if (parent1 != "*") {
954                                         restOfName = "";
955                                         pos = parent1.find_first_of('/');
956                                         if (pos != string::npos) {
957                                                 restOfName = parent1.substr(pos);
958                                                 parent1 = parent1.substr(0, pos);
959                                         }
960                                         
961                                         itUnique = uniqueNames.find(parent1);
962                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
963                                         else {  out << itUnique->second << restOfName << '\t';  }
964                                 }else { out << parent1 << '\t'; }
965                                 
966                                 //parse parent2 names
967                                 if (parent2 != "*") {
968                                         restOfName = "";
969                                         pos = parent2.find_first_of('/');
970                                         if (pos != string::npos) {
971                                                 restOfName = parent2.substr(pos);
972                                                 parent2 = parent2.substr(0, pos);
973                                         }
974                                         
975                                         itUnique = uniqueNames.find(parent2);
976                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
977                                         else {  out << itUnique->second << restOfName << '\t';  }
978                                 }else { out << parent2 << '\t'; }
979                                 
980                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
981                         }
982                 }
983                 in.close();
984                 out.close();
985                 
986                 m->mothurRemove(outputFileName);
987                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
988                 
989                                 
990                 //edit anls file
991                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
992                 /*
993                  ------------------------------------------------------------------------
994                  Query   (  179 nt) F21Fcsw_11639/ab=591/
995                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
996                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
997                  
998                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
999                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
1000                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
1001                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
1002                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
1003                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1004                  
1005                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1006                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
1007                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
1008                  Diffs      NNN     N N  N                   N  N BB    NNN                              
1009                  Votes      000     0 0  0                   0  0 ++    000                              
1010                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
1011                  
1012                  A   159 TGGAACTGAGACACGGTCCAA 179
1013                  Q   159 TGGAACTGAGACACGGTCCAA 179
1014                  B   161 TGGAACTGAGACACGGTCCAA 181
1015                  Diffs                        
1016                  Votes                        
1017                  Model   BBBBBBBBBBBBBBBBBBBBB
1018                  
1019                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
1020                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
1021                 */
1022                 if (chimealns) {
1023                         ifstream in3; 
1024                         m->openInputFile(alnsFileName, in3);
1025                 
1026                         ofstream out3;
1027                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
1028                 
1029                         name = "";
1030                         namesInFile.clear();
1031                         string line = "";
1032                         
1033                         while (!in3.eof()) {
1034                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
1035                                 
1036                                 line = "";
1037                                 line = m->getline(in3); 
1038                                 string temp = "";
1039                                 
1040                                 if (line != "") {
1041                                         istringstream iss(line);
1042                                         iss >> temp;
1043                                         
1044                                         //are you a name line
1045                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
1046                                                 int spot = 0;
1047                                                 for (int i = 0; i < line.length(); i++) {
1048                                                         spot = i;
1049                                                         if (line[i] == ')') { break; }
1050                                                         else { out3 << line[i]; }
1051                                                 }
1052                                                 
1053                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1054                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1055                                                 else {
1056                                                         out << line[spot] << line[spot+1];
1057                                                         
1058                                                         name = line.substr(spot+2);
1059                                                         
1060                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1061                                                         string restOfName = "";
1062                                                         int pos = name.find_first_of('/');
1063                                                         if (pos != string::npos) {
1064                                                                 restOfName = name.substr(pos);
1065                                                                 name = name.substr(0, pos);
1066                                                         }
1067                                                         
1068                                                         //find unique name
1069                                                         itUnique = uniqueNames.find(name);
1070                                                         
1071                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1072                                                         else {
1073                                                                 //only limit repeats on query names
1074                                                                 if (temp == "Query") {
1075                                                                         itNames = namesInFile.find((itUnique->second));
1076                                                                         
1077                                                                         if (itNames == namesInFile.end()) {
1078                                                                                 out << itUnique->second << restOfName << endl;
1079                                                                                 namesInFile.insert((itUnique->second));
1080                                                                         }
1081                                                                 }else { out << itUnique->second << restOfName << endl;  }
1082                                                         }
1083                                                         
1084                                                 }
1085                                                 
1086                                         }else { //not need to alter line
1087                                                 out3 << line << endl;
1088                                         }
1089                                 }else { out3 << endl; }
1090                         }
1091                         in3.close();
1092                         out3.close();
1093                         
1094                         m->mothurRemove(alnsFileName);
1095                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1096                 }
1097                 
1098                 return total;
1099         }
1100         catch(exception& e) {
1101                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1102                 exit(1);
1103         }
1104 }       
1105 //**********************************************************************************************************************
1106 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1107         try {
1108                 
1109                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1110                 
1111                 ofstream out;
1112                 m->openOutputFile(filename, out);
1113                 
1114                 //print new file in order of
1115                 for (int i = 0; i < nameMapCount.size(); i++) {
1116                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1117                 }
1118                 out.close();
1119                 
1120                 return 0;
1121         }
1122         catch(exception& e) {
1123                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1124                 exit(1);
1125         }
1126 }       
1127 //**********************************************************************************************************************
1128 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1129         try {
1130                 //create input file for uchime
1131                 //read through fastafile and store info
1132                 ifstream in;
1133                 m->openInputFile(filename, in);
1134                 
1135                 while (!in.eof()) {
1136                         
1137                         if (m->control_pressed) { in.close(); return 0; }
1138                         
1139                         Sequence seq(in); m->gobble(in);
1140                         seqs[seq.getName()] = seq.getAligned();
1141                 }
1142                 in.close();
1143                 
1144                 return 0;
1145         }
1146         catch(exception& e) {
1147                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1148                 exit(1);
1149         }
1150 }       
1151 //**********************************************************************************************************************
1152
1153 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1154         try {
1155                 string nameFile = "";
1156                 
1157                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1158                 
1159                 //use unique.seqs to create new name and fastafile
1160                 string inputString = "fasta=" + inputFile;
1161                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1162                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1163                 m->mothurCalling = true;
1164         
1165                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1166                 uniqueCommand->execute();
1167                 
1168                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1169                 
1170                 delete uniqueCommand;
1171                 m->mothurCalling = false;
1172                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1173                 
1174                 nameFile = filenames["name"][0];
1175                 inputFile = filenames["fasta"][0];
1176                 
1177                 return nameFile;
1178         }
1179         catch(exception& e) {
1180                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1181                 exit(1);
1182         }
1183 }
1184 //**********************************************************************************************************************
1185 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
1186         try {
1187                 
1188                 int totalSeqs = 0;
1189                 int numChimeras = 0;
1190         
1191         
1192         ofstream outCountList;
1193         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1194         
1195                 for (int i = start; i < end; i++) {
1196                         int start = time(NULL);  if (m->control_pressed) {  outCountList.close(); m->mothurRemove(countlist); return 0; }
1197             
1198                         int error;
1199             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1200             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1201                         
1202                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
1203                         totalSeqs += numSeqs;
1204                         
1205                         if (m->control_pressed) { return 0; }
1206                         
1207                         //remove file made for uchime
1208                         if (!m->debug) {  m->mothurRemove(filename);  }
1209             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1210                         
1211             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1212             //This table will zero out group counts for seqs determined to be chimeric by that group.
1213             if (dups) {
1214                 if (!m->isBlank(accnos+groups[i])) {
1215                     ifstream in;
1216                     m->openInputFile(accnos+groups[i], in);
1217                     string name;
1218                     if (hasCount) {
1219                         while (!in.eof()) {
1220                             in >> name; m->gobble(in);
1221                             outCountList << name << '\t' << groups[i] << endl;
1222                         }
1223                         in.close();
1224                     }else {
1225                         map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
1226                         map<string, string>::iterator itN;
1227                         ofstream out;
1228                         m->openOutputFile(accnos+groups[i]+".temp", out);
1229                         while (!in.eof()) {
1230                             in >> name; m->gobble(in); 
1231                             itN = thisnamemap.find(name);
1232                             if (itN != thisnamemap.end()) {
1233                                 vector<string> tempNames; m->splitAtComma(itN->second, tempNames); 
1234                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1235                                 
1236                             }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1237                         }
1238                         out.close();
1239                         in.close();
1240                         m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
1241                     }
1242                    
1243                 }
1244             }
1245             
1246                         //append files
1247                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1248                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1249                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1250                         
1251                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1252                 }
1253
1254         if (hasCount && dups) { outCountList.close(); }
1255         
1256         return totalSeqs;
1257                 
1258         }
1259         catch(exception& e) {
1260                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1261                 exit(1);
1262         }
1263 }       
1264 //**********************************************************************************************************************
1265
1266 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1267         try {
1268                 
1269                 outputFName = m->getFullPathName(outputFName);
1270                 filename = m->getFullPathName(filename);
1271                 alns = m->getFullPathName(alns);
1272                 
1273                 //to allow for spaces in the path
1274                 outputFName = "\"" + outputFName + "\"";
1275                 filename = "\"" + filename + "\"";
1276                 alns = "\"" + alns + "\"";
1277                                 
1278                 vector<char*> cPara;
1279                 
1280                 string uchimeCommand = uchimeLocation;
1281         uchimeCommand = "\"" + uchimeCommand + "\" ";
1282         
1283         char* tempUchime;
1284                 tempUchime= new char[uchimeCommand.length()+1]; 
1285                 *tempUchime = '\0';
1286                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1287                 cPara.push_back(tempUchime);
1288                 
1289         //are you using a reference file
1290                 if (templatefile != "self") {
1291             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1292             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1293             filename = outputFileName;
1294             filename = "\"" + filename + "\"";
1295                         //add reference file
1296                         char* tempRef = new char[5]; 
1297                         //strcpy(tempRef, "--db"); 
1298                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1299                         cPara.push_back(tempRef);  
1300                         char* tempR = new char[templatefile.length()+1];
1301                         //strcpy(tempR, templatefile.c_str());
1302                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1303                         cPara.push_back(tempR);
1304                 }
1305                 
1306                 char* tempIn = new char[8]; 
1307                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1308                 //strcpy(tempIn, "--input"); 
1309                 cPara.push_back(tempIn);
1310                 char* temp = new char[filename.length()+1];
1311                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1312                 //strcpy(temp, filename.c_str());
1313                 cPara.push_back(temp);
1314                 
1315                 char* tempO = new char[12]; 
1316                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1317                 //strcpy(tempO, "--uchimeout"); 
1318                 cPara.push_back(tempO);
1319                 char* tempout = new char[outputFName.length()+1];
1320                 //strcpy(tempout, outputFName.c_str());
1321                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1322                 cPara.push_back(tempout);
1323                 
1324                 if (chimealns) {
1325                         char* tempA = new char[13]; 
1326                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1327                         //strcpy(tempA, "--uchimealns"); 
1328                         cPara.push_back(tempA);
1329                         char* tempa = new char[alns.length()+1];
1330                         //strcpy(tempa, alns.c_str());
1331                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1332                         cPara.push_back(tempa);
1333                 }
1334         
1335         if (strand != "") {
1336                         char* tempA = new char[9]; 
1337                         *tempA = '\0'; strncat(tempA, "--strand", 8);
1338                         cPara.push_back(tempA);
1339                         char* tempa = new char[strand.length()+1];
1340                         *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1341                         cPara.push_back(tempa);
1342                 }
1343                 
1344                 if (useAbskew) {
1345                         char* tempskew = new char[9];
1346                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1347                         //strcpy(tempskew, "--abskew"); 
1348                         cPara.push_back(tempskew);
1349                         char* tempSkew = new char[abskew.length()+1];
1350                         //strcpy(tempSkew, abskew.c_str());
1351                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1352                         cPara.push_back(tempSkew);
1353                 }
1354                 
1355                 if (useMinH) {
1356                         char* tempminh = new char[7]; 
1357                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1358                         //strcpy(tempminh, "--minh"); 
1359                         cPara.push_back(tempminh);
1360                         char* tempMinH = new char[minh.length()+1];
1361                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1362                         //strcpy(tempMinH, minh.c_str());
1363                         cPara.push_back(tempMinH);
1364                 }
1365                 
1366                 if (useMindiv) {
1367                         char* tempmindiv = new char[9]; 
1368                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1369                         //strcpy(tempmindiv, "--mindiv"); 
1370                         cPara.push_back(tempmindiv);
1371                         char* tempMindiv = new char[mindiv.length()+1];
1372                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1373                         //strcpy(tempMindiv, mindiv.c_str());
1374                         cPara.push_back(tempMindiv);
1375                 }
1376                 
1377                 if (useXn) {
1378                         char* tempxn = new char[5]; 
1379                         //strcpy(tempxn, "--xn"); 
1380                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1381                         cPara.push_back(tempxn);
1382                         char* tempXn = new char[xn.length()+1];
1383                         //strcpy(tempXn, xn.c_str());
1384                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1385                         cPara.push_back(tempXn);
1386                 }
1387                 
1388                 if (useDn) {
1389                         char* tempdn = new char[5]; 
1390                         //strcpy(tempdn, "--dn"); 
1391                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1392                         cPara.push_back(tempdn);
1393                         char* tempDn = new char[dn.length()+1];
1394                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1395                         //strcpy(tempDn, dn.c_str());
1396                         cPara.push_back(tempDn);
1397                 }
1398                 
1399                 if (useXa) {
1400                         char* tempxa = new char[5]; 
1401                         //strcpy(tempxa, "--xa"); 
1402                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1403                         cPara.push_back(tempxa);
1404                         char* tempXa = new char[xa.length()+1];
1405                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1406                         //strcpy(tempXa, xa.c_str());
1407                         cPara.push_back(tempXa);
1408                 }
1409                 
1410                 if (useChunks) {
1411                         char* tempchunks = new char[9]; 
1412                         //strcpy(tempchunks, "--chunks"); 
1413                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1414                         cPara.push_back(tempchunks);
1415                         char* tempChunks = new char[chunks.length()+1];
1416                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1417                         //strcpy(tempChunks, chunks.c_str());
1418                         cPara.push_back(tempChunks);
1419                 }
1420                 
1421                 if (useMinchunk) {
1422                         char* tempminchunk = new char[11]; 
1423                         //strcpy(tempminchunk, "--minchunk"); 
1424                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1425                         cPara.push_back(tempminchunk);
1426                         char* tempMinchunk = new char[minchunk.length()+1];
1427                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1428                         //strcpy(tempMinchunk, minchunk.c_str());
1429                         cPara.push_back(tempMinchunk);
1430                 }
1431                 
1432                 if (useIdsmoothwindow) {
1433                         char* tempidsmoothwindow = new char[17]; 
1434                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1435                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1436                         cPara.push_back(tempidsmoothwindow);
1437                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1438                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1439                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1440                         cPara.push_back(tempIdsmoothwindow);
1441                 }
1442                 
1443                 /*if (useMinsmoothid) {
1444                         char* tempminsmoothid = new char[14]; 
1445                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1446                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1447                         cPara.push_back(tempminsmoothid);
1448                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1449                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1450                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1451                         cPara.push_back(tempMinsmoothid);
1452                 }*/
1453                 
1454                 if (useMaxp) {
1455                         char* tempmaxp = new char[7]; 
1456                         //strcpy(tempmaxp, "--maxp"); 
1457                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1458                         cPara.push_back(tempmaxp);
1459                         char* tempMaxp = new char[maxp.length()+1];
1460                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1461                         //strcpy(tempMaxp, maxp.c_str());
1462                         cPara.push_back(tempMaxp);
1463                 }
1464                 
1465                 if (!skipgaps) {
1466                         char* tempskipgaps = new char[13]; 
1467                         //strcpy(tempskipgaps, "--[no]skipgaps");
1468                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1469                         cPara.push_back(tempskipgaps);
1470                 }
1471                 
1472                 if (!skipgaps2) {
1473                         char* tempskipgaps2 = new char[14]; 
1474                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1475                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1476                         cPara.push_back(tempskipgaps2);
1477                 }
1478                 
1479                 if (useMinlen) {
1480                         char* tempminlen = new char[9]; 
1481                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1482                         //strcpy(tempminlen, "--minlen"); 
1483                         cPara.push_back(tempminlen);
1484                         char* tempMinlen = new char[minlen.length()+1];
1485                         //strcpy(tempMinlen, minlen.c_str());
1486                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1487                         cPara.push_back(tempMinlen);
1488                 }
1489                 
1490                 if (useMaxlen) {
1491                         char* tempmaxlen = new char[9]; 
1492                         //strcpy(tempmaxlen, "--maxlen"); 
1493                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1494                         cPara.push_back(tempmaxlen);
1495                         char* tempMaxlen = new char[maxlen.length()+1];
1496                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1497                         //strcpy(tempMaxlen, maxlen.c_str());
1498                         cPara.push_back(tempMaxlen);
1499                 }
1500                 
1501                 if (ucl) {
1502                         char* tempucl = new char[5]; 
1503                         strcpy(tempucl, "--ucl"); 
1504                         cPara.push_back(tempucl);
1505                 }
1506                 
1507                 if (useQueryfract) {
1508                         char* tempqueryfract = new char[13]; 
1509                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1510                         //strcpy(tempqueryfract, "--queryfract"); 
1511                         cPara.push_back(tempqueryfract);
1512                         char* tempQueryfract = new char[queryfract.length()+1];
1513                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1514                         //strcpy(tempQueryfract, queryfract.c_str());
1515                         cPara.push_back(tempQueryfract);
1516                 }
1517                 
1518                 
1519                 char** uchimeParameters;
1520                 uchimeParameters = new char*[cPara.size()];
1521                 string commandString = "";
1522                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1523                 //int numArgs = cPara.size();
1524                 
1525                 //uchime_main(numArgs, uchimeParameters); 
1526                 //cout << "commandString = " << commandString << endl;
1527 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1528 #else
1529                 commandString = "\"" + commandString + "\"";
1530 #endif
1531         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1532                 system(commandString.c_str());
1533                 
1534                 //free memory
1535                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1536                 delete[] uchimeParameters; 
1537                 
1538                 //remove "" from filenames
1539                 outputFName = outputFName.substr(1, outputFName.length()-2);
1540                 filename = filename.substr(1, filename.length()-2);
1541                 alns = alns.substr(1, alns.length()-2);
1542                 
1543                 if (m->control_pressed) { return 0; }
1544                 
1545                 //create accnos file from uchime results
1546                 ifstream in; 
1547                 m->openInputFile(outputFName, in);
1548                 
1549                 ofstream out;
1550                 m->openOutputFile(accnos, out);
1551                 
1552                 int num = 0;
1553                 numChimeras = 0;
1554                 while(!in.eof()) {
1555                         
1556                         if (m->control_pressed) { break; }
1557                         
1558                         string name = "";
1559                         string chimeraFlag = "";
1560                         //in >> chimeraFlag >> name;
1561                         
1562             string line = m->getline(in);
1563             vector<string> pieces = m->splitWhiteSpace(line);
1564             if (pieces.size() > 2) { 
1565                 name = pieces[1];
1566                 //fix name if needed
1567                 if (templatefile == "self") { 
1568                     name = name.substr(0, name.length()-1); //rip off last /
1569                     name = name.substr(0, name.find_last_of('/'));
1570                 }
1571                 
1572                 chimeraFlag = pieces[pieces.size()-1];
1573                         }
1574                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1575                         m->gobble(in);
1576                         
1577                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1578                         num++;
1579                 }
1580                 in.close();
1581                 out.close();
1582                 
1583         //if (templatefile != "self") {  m->mothurRemove(filename); }
1584         
1585                 return num;
1586         }
1587         catch(exception& e) {
1588                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1589                 exit(1);
1590         }
1591 }
1592 /**************************************************************************************************/
1593 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1594 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1595         try {
1596         
1597         ifstream in;
1598         m->openInputFile(filename, in);
1599         
1600         ofstream out;
1601         m->openOutputFile(output, out);
1602         
1603         while (!in.eof()) {
1604             if (m->control_pressed) { break;  }
1605             
1606             Sequence seq(in); m->gobble(in);
1607             
1608             if (seq.getName() != "") { seq.printSequence(out); }
1609         }
1610         in.close();
1611         out.close();
1612         
1613         return 0;
1614     }
1615         catch(exception& e) {
1616                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1617                 exit(1);
1618         }
1619 }
1620 /**************************************************************************************************/
1621
1622 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1623         try {
1624                 
1625                 processIDS.clear();
1626                 int process = 1;
1627                 int num = 0;
1628                 vector<string> files;
1629                 
1630 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1631                 //break up file into multiple files
1632                 m->divideFile(filename, processors, files);
1633                 
1634                 if (m->control_pressed) {  return 0;  }
1635                                 
1636                 //loop through and create all the processes you want
1637                 while (process != processors) {
1638                         int pid = fork();
1639                         
1640                         if (pid > 0) {
1641                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1642                                 process++;
1643                         }else if (pid == 0){
1644                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1645                                 
1646                                 //pass numSeqs to parent
1647                                 ofstream out;
1648                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1649                                 m->openOutputFile(tempFile, out);
1650                                 out << num << endl;
1651                                 out << numChimeras << endl;
1652                                 out.close();
1653                                 
1654                                 exit(0);
1655                         }else { 
1656                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1657                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1658                                 exit(0);
1659                         }
1660                 }
1661                 
1662                 //do my part
1663                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1664                 
1665                 //force parent to wait until all the processes are done
1666                 for (int i=0;i<processIDS.size();i++) { 
1667                         int temp = processIDS[i];
1668                         wait(&temp);
1669                 }
1670                 
1671                 for (int i = 0; i < processIDS.size(); i++) {
1672                         ifstream in;
1673                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1674                         m->openInputFile(tempFile, in);
1675                         if (!in.eof()) { 
1676                                 int tempNum = 0; 
1677                                 in >> tempNum; m->gobble(in);
1678                                 num += tempNum; 
1679                                 in >> tempNum;
1680                                 numChimeras += tempNum;
1681                         }
1682                         in.close(); m->mothurRemove(tempFile);
1683                 }
1684 #else
1685                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1686                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1687                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1688                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1689                 
1690                 //divide file
1691                 int count = 0;
1692                 int spot = 0;
1693                 map<int, ofstream*> filehandles;
1694                 map<int, ofstream*>::iterator it3;
1695                 
1696                 ofstream* temp;
1697                 for (int i = 0; i < processors; i++) {
1698                         temp = new ofstream;
1699                         filehandles[i] = temp;
1700                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1701                         files.push_back(filename+toString(i)+".temp");
1702                 }
1703                 
1704                 ifstream in;
1705                 m->openInputFile(filename, in);
1706                 
1707                 while(!in.eof()) {
1708                         
1709                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1710                         
1711                         Sequence tempSeq(in); m->gobble(in); 
1712                         
1713                         if (tempSeq.getName() != "") {
1714                                 tempSeq.printSequence(*(filehandles[spot])); 
1715                                 spot++; count++;
1716                                 if (spot == processors) { spot = 0; }
1717                         }
1718                 }
1719                 in.close();
1720                 
1721                 //delete memory
1722                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1723                         (*(it3->second)).close();
1724                         delete it3->second;
1725                 }
1726                 
1727                 //sanity check for number of processors
1728                 if (count < processors) { processors = count; }
1729                 
1730                 vector<uchimeData*> pDataArray; 
1731                 DWORD   dwThreadIdArray[processors-1];
1732                 HANDLE  hThreadArray[processors-1]; 
1733                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1734                 
1735                 //Create processor worker threads.
1736                 for( int i=1; i<processors; i++ ){
1737                         // Allocate memory for thread data.
1738                         string extension = toString(i) + ".temp";
1739                         
1740                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0,  i);
1741                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1742                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1743                         
1744                         pDataArray.push_back(tempUchime);
1745                         processIDS.push_back(i);
1746                         
1747                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1748                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1749                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1750                 }
1751                 
1752                 
1753                 //using the main process as a worker saves time and memory
1754                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1755                 
1756                 //Wait until all threads have terminated.
1757                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1758                 
1759                 //Close all thread handles and free memory allocations.
1760                 for(int i=0; i < pDataArray.size(); i++){
1761                         num += pDataArray[i]->count;
1762                         numChimeras += pDataArray[i]->numChimeras;
1763                         CloseHandle(hThreadArray[i]);
1764                         delete pDataArray[i];
1765                 }
1766 #endif          
1767                 
1768                 //append output files
1769                 for(int i=0;i<processIDS.size();i++){
1770                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1771                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1772                         
1773                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1774                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1775                         
1776                         if (chimealns) {
1777                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1778                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1779                         }
1780                 }
1781                 
1782                 //get rid of the file pieces.
1783                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1784                 return num;     
1785         }
1786         catch(exception& e) {
1787                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1788                 exit(1);
1789         }
1790 }
1791 /**************************************************************************************************/
1792
1793 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1794         try {
1795                 
1796                 processIDS.clear();
1797                 int process = 1;
1798                 int num = 0;
1799         
1800         CountTable newCount;
1801         if (hasCount && dups) { newCount.readTable(nameFile); }
1802                 
1803                 //sanity check
1804                 if (groups.size() < processors) { processors = groups.size(); }
1805                 
1806                 //divide the groups between the processors
1807                 vector<linePair> lines;
1808                 int numGroupsPerProcessor = groups.size() / processors;
1809                 for (int i = 0; i < processors; i++) {
1810                         int startIndex =  i * numGroupsPerProcessor;
1811                         int endIndex = (i+1) * numGroupsPerProcessor;
1812                         if(i == (processors - 1)){      endIndex = groups.size();       }
1813                         lines.push_back(linePair(startIndex, endIndex));
1814                 }
1815                 
1816 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1817                                 
1818                 //loop through and create all the processes you want
1819                 while (process != processors) {
1820                         int pid = fork();
1821                         
1822                         if (pid > 0) {
1823                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1824                                 process++;
1825                         }else if (pid == 0){
1826                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1827                                 
1828                                 //pass numSeqs to parent
1829                                 ofstream out;
1830                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1831                                 m->openOutputFile(tempFile, out);
1832                                 out << num << endl;
1833                                 out.close();
1834                                 
1835                                 exit(0);
1836                         }else { 
1837                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1838                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1839                                 exit(0);
1840                         }
1841                 }
1842                 
1843                 //do my part
1844                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1845                 
1846                 //force parent to wait until all the processes are done
1847                 for (int i=0;i<processIDS.size();i++) { 
1848                         int temp = processIDS[i];
1849                         wait(&temp);
1850                 }
1851         
1852                 for (int i = 0; i < processIDS.size(); i++) {
1853                         ifstream in;
1854                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1855                         m->openInputFile(tempFile, in);
1856                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1857                         in.close(); m->mothurRemove(tempFile);
1858         }
1859         
1860 #else
1861                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1862                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1863                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1864                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1865                 
1866                 vector<uchimeData*> pDataArray; 
1867                 DWORD   dwThreadIdArray[processors-1];
1868                 HANDLE  hThreadArray[processors-1]; 
1869                 
1870                 //Create processor worker threads.
1871                 for( int i=1; i<processors; i++ ){
1872                         // Allocate memory for thread data.
1873                         string extension = toString(i) + ".temp";
1874                         
1875                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end,  i);
1876                         tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1877                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1878                         
1879                         pDataArray.push_back(tempUchime);
1880                         processIDS.push_back(i);
1881                         
1882                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1883                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1884                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1885                 }
1886                 
1887                 
1888                 //using the main process as a worker saves time and memory
1889                 num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
1890                 
1891                 //Wait until all threads have terminated.
1892                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1893                 
1894                 //Close all thread handles and free memory allocations.
1895                 for(int i=0; i < pDataArray.size(); i++){
1896                         num += pDataArray[i]->count;
1897                         CloseHandle(hThreadArray[i]);
1898                         delete pDataArray[i];
1899                 }
1900         
1901         
1902 #endif          
1903       
1904         //read my own
1905         if (hasCount && dups) {
1906             if (!m->isBlank(accnos + ".byCount")) {
1907                 ifstream in2;
1908                 m->openInputFile(accnos + ".byCount", in2);
1909                 
1910                 string name, group;
1911                 while (!in2.eof()) {
1912                     in2 >> name >> group; m->gobble(in2);
1913                     newCount.setAbund(name, group, 0);
1914                 }
1915                 in2.close();
1916             }
1917             m->mothurRemove(accnos + ".byCount");
1918         }
1919        
1920                 //append output files
1921                 for(int i=0;i<processIDS.size();i++){
1922                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1923                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1924                         
1925                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1926                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1927                         
1928                         if (chimealns) {
1929                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1930                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1931                         }
1932             
1933             if (hasCount && dups) {
1934                 if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
1935                     ifstream in2;
1936                     m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
1937                     
1938                     string name, group;
1939                     while (!in2.eof()) {
1940                         in2 >> name >> group; m->gobble(in2);
1941                         newCount.setAbund(name, group, 0);
1942                     }
1943                     in2.close();
1944                 }
1945                 m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
1946             }
1947
1948                 }
1949         
1950         //print new *.pick.count_table
1951         if (hasCount && dups) {  newCount.printTable(newCountFile);   }
1952                 
1953                 return num;     
1954                 
1955         }
1956         catch(exception& e) {
1957                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1958                 exit(1);
1959         }
1960 }
1961 /**************************************************************************************************/
1962