]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
added dups parameter to chimera.uchime. working on make.contigs command.
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
29                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
30                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
31                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
32                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
33                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
34                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
35                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
36                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
37                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
38         CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
39
40                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
41                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
42                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
43                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
44                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
45                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
46                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
47                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
48
49                 vector<string> myArray;
50                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string ChimeraUchimeCommand::getHelpString(){   
60         try {
61                 string helpString = "";
62                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
63                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
64                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dups, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
65                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
66                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
67         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
68                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
69                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
70         helpString += "If the dups parameter is true, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=t.\n";
71                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
72                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
73                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
74                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
75                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
76                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
77                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
78                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
79                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
80                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
81                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
82                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
83                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
84                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
85                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
86                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
87                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
88                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
89                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
90                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
91 #ifdef USE_MPI
92                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
93 #endif
94                 helpString += "The chimera.uchime command should be in the following format: \n";
95                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
96                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
97                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
98                 return helpString;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){    
107         try {
108         string outputFileName = "";
109                 map<string, vector<string> >::iterator it;
110         
111         //is this a type this command creates
112         it = outputTypes.find(type);
113         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
114         else {
115             if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
116             else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
117             else if (type == "alns") {  outputFileName =  "uchime.alns"; }
118             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
119         }
120         return outputFileName;
121         }
122         catch(exception& e) {
123                 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
124                 exit(1);
125         }
126 }
127 //**********************************************************************************************************************
128 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
129         try {
130                 abort = true; calledHelp = true;
131                 setParameters();
132                 vector<string> tempOutNames;
133                 outputTypes["chimera"] = tempOutNames;
134                 outputTypes["accnos"] = tempOutNames;
135                 outputTypes["alns"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
144         try {
145                 abort = false; calledHelp = false; hasName=false; hasCount=false;
146                 ReferenceDB* rdb = ReferenceDB::getInstance();
147                 
148                 //allow user to run help
149                 if(option == "help") { help(); abort = true; calledHelp = true; }
150                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151                 
152                 else {
153                         vector<string> myArray = setParameters();
154                         
155                         OptionParser parser(option);
156                         map<string,string> parameters = parser.getParameters();
157                         
158                         ValidParameters validParameter("chimera.uchime");
159                         map<string,string>::iterator it;
160                         
161                         //check to make sure all parameters are valid for command
162                         for (it = parameters.begin(); it != parameters.end(); it++) { 
163                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
164                         }
165                         
166                         vector<string> tempOutNames;
167                         outputTypes["chimera"] = tempOutNames;
168                         outputTypes["accnos"] = tempOutNames;
169                         outputTypes["alns"] = tempOutNames;
170                         
171                         //if the user changes the input directory command factory will send this info to us in the output parameter 
172                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
173                         if (inputDir == "not found"){   inputDir = "";          }
174                         
175                         //check for required parameters
176                         fastafile = validParameter.validFile(parameters, "fasta", false);
177                         if (fastafile == "not found") {                                 
178                                 //if there is a current fasta file, use it
179                                 string filename = m->getFastaFile(); 
180                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
181                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182                         }else { 
183                                 m->splitAtDash(fastafile, fastaFileNames);
184                                 
185                                 //go through files and make sure they are good, if not, then disregard them
186                                 for (int i = 0; i < fastaFileNames.size(); i++) {
187                                         
188                                         bool ignore = false;
189                                         if (fastaFileNames[i] == "current") { 
190                                                 fastaFileNames[i] = m->getFastaFile(); 
191                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192                                                 else {  
193                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
194                                                         //erase from file list
195                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
196                                                         i--;
197                                                 }
198                                         }
199                                         
200                                         if (!ignore) {
201                                                 
202                                                 if (inputDir != "") {
203                                                         string path = m->hasPath(fastaFileNames[i]);
204                                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
206                                                 }
207                                                 
208                                                 int ableToOpen;
209                                                 ifstream in;
210                                                 
211                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212                                                 
213                                                 //if you can't open it, try default location
214                                                 if (ableToOpen == 1) {
215                                                         if (m->getDefaultPath() != "") { //default path is set
216                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
217                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218                                                                 ifstream in2;
219                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220                                                                 in2.close();
221                                                                 fastaFileNames[i] = tryPath;
222                                                         }
223                                                 }
224                                                 
225                                                 if (ableToOpen == 1) {
226                                                         if (m->getOutputDir() != "") { //default path is set
227                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
228                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229                                                                 ifstream in2;
230                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231                                                                 in2.close();
232                                                                 fastaFileNames[i] = tryPath;
233                                                         }
234                                                 }
235                                                 
236                                                 in.close();
237                                                 
238                                                 if (ableToOpen == 1) { 
239                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
240                                                         //erase from file list
241                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
242                                                         i--;
243                                                 }else {
244                                                         m->setFastaFile(fastaFileNames[i]);
245                                                 }
246                                         }
247                                 }
248                                 
249                                 //make sure there is at least one valid file left
250                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
251                         }
252                         
253                         
254                         //check for required parameters
255                         namefile = validParameter.validFile(parameters, "name", false);
256                         if (namefile == "not found") { namefile = "";   }
257                         else { 
258                                 m->splitAtDash(namefile, nameFileNames);
259                                 
260                                 //go through files and make sure they are good, if not, then disregard them
261                                 for (int i = 0; i < nameFileNames.size(); i++) {
262                                         
263                                         bool ignore = false;
264                                         if (nameFileNames[i] == "current") { 
265                                                 nameFileNames[i] = m->getNameFile(); 
266                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267                                                 else {  
268                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
269                                                         //erase from file list
270                                                         nameFileNames.erase(nameFileNames.begin()+i);
271                                                         i--;
272                                                 }
273                                         }
274                                         
275                                         if (!ignore) {
276                                                 
277                                                 if (inputDir != "") {
278                                                         string path = m->hasPath(nameFileNames[i]);
279                                                         //if the user has not given a path then, add inputdir. else leave path alone.
280                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
281                                                 }
282                                                 
283                                                 int ableToOpen;
284                                                 ifstream in;
285                                                 
286                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287                                                 
288                                                 //if you can't open it, try default location
289                                                 if (ableToOpen == 1) {
290                                                         if (m->getDefaultPath() != "") { //default path is set
291                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
292                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293                                                                 ifstream in2;
294                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295                                                                 in2.close();
296                                                                 nameFileNames[i] = tryPath;
297                                                         }
298                                                 }
299                                                 
300                                                 if (ableToOpen == 1) {
301                                                         if (m->getOutputDir() != "") { //default path is set
302                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
303                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304                                                                 ifstream in2;
305                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306                                                                 in2.close();
307                                                                 nameFileNames[i] = tryPath;
308                                                         }
309                                                 }
310                                                 
311                                                 in.close();
312                                                 
313                                                 if (ableToOpen == 1) { 
314                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
315                                                         //erase from file list
316                                                         nameFileNames.erase(nameFileNames.begin()+i);
317                                                         i--;
318                                                 }else {
319                                                         m->setNameFile(nameFileNames[i]);
320                                                 }
321                                         }
322                                 }
323                         }
324             
325             if (nameFileNames.size() != 0) { hasName = true; }
326             
327             //check for required parameters
328             vector<string> countfileNames;
329                         countfile = validParameter.validFile(parameters, "count", false);
330                         if (countfile == "not found") { 
331                 countfile = "";  
332                         }else { 
333                                 m->splitAtDash(countfile, countfileNames);
334                                 
335                                 //go through files and make sure they are good, if not, then disregard them
336                                 for (int i = 0; i < countfileNames.size(); i++) {
337                                         
338                                         bool ignore = false;
339                                         if (countfileNames[i] == "current") { 
340                                                 countfileNames[i] = m->getCountTableFile(); 
341                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342                                                 else {  
343                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
344                                                         //erase from file list
345                                                         countfileNames.erase(countfileNames.begin()+i);
346                                                         i--;
347                                                 }
348                                         }
349                                         
350                                         if (!ignore) {
351                                                 
352                                                 if (inputDir != "") {
353                                                         string path = m->hasPath(countfileNames[i]);
354                                                         //if the user has not given a path then, add inputdir. else leave path alone.
355                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
356                                                 }
357                                                 
358                                                 int ableToOpen;
359                                                 ifstream in;
360                                                 
361                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362                                                 
363                                                 //if you can't open it, try default location
364                                                 if (ableToOpen == 1) {
365                                                         if (m->getDefaultPath() != "") { //default path is set
366                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
367                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368                                                                 ifstream in2;
369                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370                                                                 in2.close();
371                                                                 countfileNames[i] = tryPath;
372                                                         }
373                                                 }
374                                                 
375                                                 if (ableToOpen == 1) {
376                                                         if (m->getOutputDir() != "") { //default path is set
377                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
378                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379                                                                 ifstream in2;
380                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381                                                                 in2.close();
382                                                                 countfileNames[i] = tryPath;
383                                                         }
384                                                 }
385                                                 
386                                                 in.close();
387                                                 
388                                                 if (ableToOpen == 1) { 
389                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
390                                                         //erase from file list
391                                                         countfileNames.erase(countfileNames.begin()+i);
392                                                         i--;
393                                                 }else {
394                                                         m->setCountTableFile(countfileNames[i]);
395                                                 }
396                                         }
397                                 }
398                         }
399             
400             if (countfileNames.size() != 0) { hasCount = true; }
401             
402                         //make sure there is at least one valid file left
403             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404             
405             if (!hasName && hasCount) { nameFileNames = countfileNames; }
406             
407                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408                         
409                         bool hasGroup = true;
410                         groupfile = validParameter.validFile(parameters, "group", false);
411                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
412                         else { 
413                                 m->splitAtDash(groupfile, groupFileNames);
414                                 
415                                 //go through files and make sure they are good, if not, then disregard them
416                                 for (int i = 0; i < groupFileNames.size(); i++) {
417                                         
418                                         bool ignore = false;
419                                         if (groupFileNames[i] == "current") { 
420                                                 groupFileNames[i] = m->getGroupFile(); 
421                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422                                                 else {  
423                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
424                                                         //erase from file list
425                                                         groupFileNames.erase(groupFileNames.begin()+i);
426                                                         i--;
427                                                 }
428                                         }
429                                         
430                                         if (!ignore) {
431                                                 
432                                                 if (inputDir != "") {
433                                                         string path = m->hasPath(groupFileNames[i]);
434                                                         //if the user has not given a path then, add inputdir. else leave path alone.
435                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
436                                                 }
437                                                 
438                                                 int ableToOpen;
439                                                 ifstream in;
440                                                 
441                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442                                                 
443                                                 //if you can't open it, try default location
444                                                 if (ableToOpen == 1) {
445                                                         if (m->getDefaultPath() != "") { //default path is set
446                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
447                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448                                                                 ifstream in2;
449                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450                                                                 in2.close();
451                                                                 groupFileNames[i] = tryPath;
452                                                         }
453                                                 }
454                                                 
455                                                 if (ableToOpen == 1) {
456                                                         if (m->getOutputDir() != "") { //default path is set
457                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
458                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459                                                                 ifstream in2;
460                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461                                                                 in2.close();
462                                                                 groupFileNames[i] = tryPath;
463                                                         }
464                                                 }
465                                                 
466                                                 in.close();
467                                                 
468                                                 if (ableToOpen == 1) { 
469                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
470                                                         //erase from file list
471                                                         groupFileNames.erase(groupFileNames.begin()+i);
472                                                         i--;
473                                                 }else {
474                                                         m->setGroupFile(groupFileNames[i]);
475                                                 }
476                                         }
477                                 }
478                                 
479                                 //make sure there is at least one valid file left
480                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
481                         }
482                         
483                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484                         
485             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
486                         //if the user changes the output directory command factory will send this info to us in the output parameter 
487                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
488                         
489                         
490                         //if the user changes the output directory command factory will send this info to us in the output parameter 
491                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
492                         
493                         string path;
494                         it = parameters.find("reference");
495                         //user has given a template file
496                         if(it != parameters.end()){ 
497                                 if (it->second == "self") { templatefile = "self"; }
498                                 else {
499                                         path = m->hasPath(it->second);
500                                         //if the user has not given a path then, add inputdir. else leave path alone.
501                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
502                                         
503                                         templatefile = validParameter.validFile(parameters, "reference", true);
504                                         if (templatefile == "not open") { abort = true; }
505                                         else if (templatefile == "not found") { //check for saved reference sequences
506                                                 if (rdb->getSavedReference() != "") {
507                                                         templatefile = rdb->getSavedReference();
508                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
509                                                 }else {
510                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
511                                                         m->mothurOutEndLine();
512                                                         abort = true; 
513                                                 }
514                                         }
515                                 }
516                         }else if (hasName) {  templatefile = "self"; }
517             else if (hasCount) {  templatefile = "self"; }
518                         else { 
519                                 if (rdb->getSavedReference() != "") {
520                                         templatefile = rdb->getSavedReference();
521                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
522                                 }else {
523                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
524                                         m->mothurOutEndLine();
525                                         templatefile = ""; abort = true; 
526                                 } 
527                         }
528                                 
529                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
530                         m->setProcessors(temp);
531                         m->mothurConvert(temp, processors);
532                         
533                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
534                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
535                         
536                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
537                         chimealns = m->isTrue(temp); 
538                         
539                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
540                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
541                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
542                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
543                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
544                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
545                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
546                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
547                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
548                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
549                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
550                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
551                         
552                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
553                         ucl = m->isTrue(temp);
554                         
555                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
556                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
557                         
558                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
559                         skipgaps = m->isTrue(temp); 
560
561                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
562                         skipgaps2 = m->isTrue(temp); 
563             
564             string usedDups = "true";
565                         temp = validParameter.validFile(parameters, "dups", false);     
566                         if (temp == "not found") { 
567                                 if (groupfile != "")    {  temp = "true";                                       }
568                                 else                    {  temp = "false"; usedDups = "";       }
569                         }
570                         dups = m->isTrue(temp);
571
572                         
573                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
574                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
575                         
576                         //look for uchime exe
577                         path = m->argv;
578                         string tempPath = path;
579                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
580                         path = path.substr(0, (tempPath.find_last_of('m')));
581                         
582                         string uchimeCommand;
583 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
584                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
585             if (m->debug) { 
586                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
587                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
588                 newCommand->execute();
589                 delete newCommand;
590                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
591                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
592                 newCommand->execute();
593                 delete newCommand;
594             }
595 #else
596                         uchimeCommand = path + "uchime.exe";
597 #endif
598         
599                         //test to make sure uchime exists
600                         ifstream in;
601                         uchimeCommand = m->getFullPathName(uchimeCommand);
602                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
603                         if(ableToOpen == 1) {   
604                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
605                 //check to see if uchime is in the path??
606                 
607                 string uLocation = m->findProgramPath("uchime");
608                 
609                 
610                 ifstream in2;
611 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
612                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
613 #else
614                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
615 #endif
616
617                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
618                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
619             }else {  uchimeLocation = uchimeCommand; }
620             
621             uchimeLocation = m->getFullPathName(uchimeLocation);
622         }
623         }
624         catch(exception& e) {
625                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
626                 exit(1);
627         }
628 }
629 //***************************************************************************************************************
630
631 int ChimeraUchimeCommand::execute(){
632         try{
633         
634         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
635                 
636                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
637                 
638                 for (int s = 0; s < fastaFileNames.size(); s++) {
639                         
640                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
641                         
642                         int start = time(NULL); 
643                         string nameFile = "";
644                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
645                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
646                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
647                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
648                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
649                                 
650                         //you provided a groupfile
651                         string groupFile = "";
652             bool hasGroup = false;
653                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
654             else if (hasCount) {
655                 CountTable ct;
656                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
657             }
658                         
659                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
660
661                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
662                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
663                                         nameFile = nameFileNames[s];
664                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
665                                                                                 
666                                 map<string, string> seqs;  
667                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
668
669                                 //read namefile
670                                 vector<seqPriorityNode> nameMapCount;
671                 int error;
672                 if (hasCount) {
673                     CountTable ct;
674                     ct.readTable(nameFile);
675                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
676                         int num = ct.getNumSeqs(it->first);
677                         if (num == 0) { error = 1; }
678                         else {
679                             seqPriorityNode temp(num, it->second, it->first);
680                             nameMapCount.push_back(temp);
681                         }
682                     }
683                 }else {
684                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
685                 }
686                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
687                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
688                                 
689                                 printFile(nameMapCount, newFasta);
690                                 fastaFileNames[s] = newFasta;
691                         }
692                         
693                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
694                         
695                         if (hasGroup) {
696                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
697                                         nameFile = nameFileNames[s];
698                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
699                                 
700                                 //Parse sequences by group
701                 vector<string> groups;
702                 map<string, string> uniqueNames;
703                 if (hasCount) {
704                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
705                     groups = cparser->getNamesOfGroups();
706                     uniqueNames = cparser->getAllSeqsMap();
707                 }else{
708                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
709                     groups = sparser->getNamesOfGroups();
710                     uniqueNames = sparser->getAllSeqsMap();
711                 }
712                                         
713                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
714                                                                 
715                                 //clears files
716                                 ofstream out, out1, out2;
717                                 m->openOutputFile(outputFileName, out); out.close(); 
718                                 m->openOutputFile(accnosFileName, out1); out1.close();
719                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
720                                 int totalSeqs = 0;
721                                 
722                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
723                                 else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
724
725                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
726                 if (hasCount) { delete cparser; }
727                 else { delete sparser; }
728                 
729                 if (dups) { 
730                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
731                                 
732                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
733                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
734                                 }
735                 
736                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
737                                         
738                         }else{
739                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
740                         
741                                 int numSeqs = 0;
742                                 int numChimeras = 0;
743
744                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
745                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
746                                 
747                                 //add headings
748                                 ofstream out;
749                                 m->openOutputFile(outputFileName+".temp", out); 
750                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
751                                 out.close();
752                                 
753                                 m->appendFiles(outputFileName, outputFileName+".temp");
754                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
755                                 
756                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
757                         
758                                 //remove file made for uchime
759                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
760                         
761                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
762                         }
763                         
764                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
765                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
766                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
767                 }
768         
769                 //set accnos file as new current accnosfile
770                 string current = "";
771                 itTypes = outputTypes.find("accnos");
772                 if (itTypes != outputTypes.end()) {
773                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
774                 }
775                 
776                 m->mothurOutEndLine();
777                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
778                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
779                 m->mothurOutEndLine();
780                 
781                 return 0;
782                 
783         }
784         catch(exception& e) {
785                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
786                 exit(1);
787         }
788 }
789 //**********************************************************************************************************************
790 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
791         try {
792                 map<string, string>::iterator itUnique;
793                 int total = 0;
794                 
795                 //edit accnos file
796                 ifstream in2; 
797                 m->openInputFile(accnosFileName, in2);
798                 
799                 ofstream out2;
800                 m->openOutputFile(accnosFileName+".temp", out2);
801                 
802                 string name;
803                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
804                 set<string>::iterator itNames;
805                 set<string> chimerasInFile;
806                 set<string>::iterator itChimeras;
807
808                 
809                 while (!in2.eof()) {
810                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
811                         
812                         in2 >> name; m->gobble(in2);
813                         
814                         //find unique name
815                         itUnique = uniqueNames.find(name);
816                         
817                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
818                         else {
819                                 itChimeras = chimerasInFile.find((itUnique->second));
820                                 
821                                 if (itChimeras == chimerasInFile.end()) {
822                                         out2 << itUnique->second << endl;
823                                         chimerasInFile.insert((itUnique->second));
824                                         total++;
825                                 }
826                         }
827                 }
828                 in2.close();
829                 out2.close();
830                 
831                 m->mothurRemove(accnosFileName);
832                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
833                 
834                 
835                 
836                 //edit chimera file
837                 ifstream in; 
838                 m->openInputFile(outputFileName, in);
839                 
840                 ofstream out;
841                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
842                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
843                 
844                 float temp1;
845                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
846                 name = "";
847                 namesInFile.clear();    
848                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
849                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
850                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
851                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
852                 */
853                 
854                 while (!in.eof()) {
855                         
856                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
857                         
858                         bool print = false;
859                         in >> temp1;    m->gobble(in);
860                         in >> name;             m->gobble(in);
861                         in >> parent1;  m->gobble(in);
862                         in >> parent2;  m->gobble(in);
863                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
864                         m->gobble(in);
865                         
866                         //parse name - name will look like U68590/ab=1/
867                         string restOfName = "";
868                         int pos = name.find_first_of('/');
869                         if (pos != string::npos) {
870                                 restOfName = name.substr(pos);
871                                 name = name.substr(0, pos);
872                         }
873                         
874                         //find unique name
875                         itUnique = uniqueNames.find(name);
876                         
877                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
878                         else {
879                                 name = itUnique->second;
880                                 //is this name already in the file
881                                 itNames = namesInFile.find((name));
882                                 
883                                 if (itNames == namesInFile.end()) { //no not in file
884                                         if (flag == "N") { //are you really a no??
885                                                 //is this sequence really not chimeric??
886                                                 itChimeras = chimerasInFile.find(name);
887                                                 
888                                                 //then you really are a no so print, otherwise skip
889                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
890                                         }else{ print = true; }
891                                 }
892                         }
893                         
894                         if (print) {
895                                 out << temp1 << '\t' << name << restOfName << '\t';
896                                 namesInFile.insert(name);
897                                 
898                                 //parse parent1 names
899                                 if (parent1 != "*") {
900                                         restOfName = "";
901                                         pos = parent1.find_first_of('/');
902                                         if (pos != string::npos) {
903                                                 restOfName = parent1.substr(pos);
904                                                 parent1 = parent1.substr(0, pos);
905                                         }
906                                         
907                                         itUnique = uniqueNames.find(parent1);
908                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909                                         else {  out << itUnique->second << restOfName << '\t';  }
910                                 }else { out << parent1 << '\t'; }
911                                 
912                                 //parse parent2 names
913                                 if (parent2 != "*") {
914                                         restOfName = "";
915                                         pos = parent2.find_first_of('/');
916                                         if (pos != string::npos) {
917                                                 restOfName = parent2.substr(pos);
918                                                 parent2 = parent2.substr(0, pos);
919                                         }
920                                         
921                                         itUnique = uniqueNames.find(parent2);
922                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
923                                         else {  out << itUnique->second << restOfName << '\t';  }
924                                 }else { out << parent2 << '\t'; }
925                                 
926                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
927                         }
928                 }
929                 in.close();
930                 out.close();
931                 
932                 m->mothurRemove(outputFileName);
933                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
934                 
935                                 
936                 //edit anls file
937                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
938                 /*
939                  ------------------------------------------------------------------------
940                  Query   (  179 nt) F21Fcsw_11639/ab=591/
941                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
942                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
943                  
944                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
945                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
946                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
947                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
948                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
949                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
950                  
951                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
952                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
953                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
954                  Diffs      NNN     N N  N                   N  N BB    NNN                              
955                  Votes      000     0 0  0                   0  0 ++    000                              
956                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
957                  
958                  A   159 TGGAACTGAGACACGGTCCAA 179
959                  Q   159 TGGAACTGAGACACGGTCCAA 179
960                  B   161 TGGAACTGAGACACGGTCCAA 181
961                  Diffs                        
962                  Votes                        
963                  Model   BBBBBBBBBBBBBBBBBBBBB
964                  
965                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
966                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
967                 */
968                 if (chimealns) {
969                         ifstream in3; 
970                         m->openInputFile(alnsFileName, in3);
971                 
972                         ofstream out3;
973                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
974                 
975                         name = "";
976                         namesInFile.clear();
977                         string line = "";
978                         
979                         while (!in3.eof()) {
980                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
981                                 
982                                 line = "";
983                                 line = m->getline(in3); 
984                                 string temp = "";
985                                 
986                                 if (line != "") {
987                                         istringstream iss(line);
988                                         iss >> temp;
989                                         
990                                         //are you a name line
991                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
992                                                 int spot = 0;
993                                                 for (int i = 0; i < line.length(); i++) {
994                                                         spot = i;
995                                                         if (line[i] == ')') { break; }
996                                                         else { out3 << line[i]; }
997                                                 }
998                                                 
999                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1000                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1001                                                 else {
1002                                                         out << line[spot] << line[spot+1];
1003                                                         
1004                                                         name = line.substr(spot+2);
1005                                                         
1006                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1007                                                         string restOfName = "";
1008                                                         int pos = name.find_first_of('/');
1009                                                         if (pos != string::npos) {
1010                                                                 restOfName = name.substr(pos);
1011                                                                 name = name.substr(0, pos);
1012                                                         }
1013                                                         
1014                                                         //find unique name
1015                                                         itUnique = uniqueNames.find(name);
1016                                                         
1017                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1018                                                         else {
1019                                                                 //only limit repeats on query names
1020                                                                 if (temp == "Query") {
1021                                                                         itNames = namesInFile.find((itUnique->second));
1022                                                                         
1023                                                                         if (itNames == namesInFile.end()) {
1024                                                                                 out << itUnique->second << restOfName << endl;
1025                                                                                 namesInFile.insert((itUnique->second));
1026                                                                         }
1027                                                                 }else { out << itUnique->second << restOfName << endl;  }
1028                                                         }
1029                                                         
1030                                                 }
1031                                                 
1032                                         }else { //not need to alter line
1033                                                 out3 << line << endl;
1034                                         }
1035                                 }else { out3 << endl; }
1036                         }
1037                         in3.close();
1038                         out3.close();
1039                         
1040                         m->mothurRemove(alnsFileName);
1041                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1042                 }
1043                 
1044                 return total;
1045         }
1046         catch(exception& e) {
1047                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1048                 exit(1);
1049         }
1050 }       
1051 //**********************************************************************************************************************
1052 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1053         try {
1054                 
1055                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1056                 
1057                 ofstream out;
1058                 m->openOutputFile(filename, out);
1059                 
1060                 //print new file in order of
1061                 for (int i = 0; i < nameMapCount.size(); i++) {
1062                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1063                 }
1064                 out.close();
1065                 
1066                 return 0;
1067         }
1068         catch(exception& e) {
1069                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1070                 exit(1);
1071         }
1072 }       
1073 //**********************************************************************************************************************
1074 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1075         try {
1076                 //create input file for uchime
1077                 //read through fastafile and store info
1078                 ifstream in;
1079                 m->openInputFile(filename, in);
1080                 
1081                 while (!in.eof()) {
1082                         
1083                         if (m->control_pressed) { in.close(); return 0; }
1084                         
1085                         Sequence seq(in); m->gobble(in);
1086                         seqs[seq.getName()] = seq.getAligned();
1087                 }
1088                 in.close();
1089                 
1090                 return 0;
1091         }
1092         catch(exception& e) {
1093                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1094                 exit(1);
1095         }
1096 }       
1097 //**********************************************************************************************************************
1098
1099 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1100         try {
1101                 string nameFile = "";
1102                 
1103                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1104                 
1105                 //use unique.seqs to create new name and fastafile
1106                 string inputString = "fasta=" + inputFile;
1107                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1108                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1109                 m->mothurCalling = true;
1110         
1111                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1112                 uniqueCommand->execute();
1113                 
1114                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1115                 
1116                 delete uniqueCommand;
1117                 m->mothurCalling = false;
1118                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1119                 
1120                 nameFile = filenames["name"][0];
1121                 inputFile = filenames["fasta"][0];
1122                 
1123                 return nameFile;
1124         }
1125         catch(exception& e) {
1126                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1127                 exit(1);
1128         }
1129 }
1130 //**********************************************************************************************************************
1131 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1132         try {
1133                 
1134                 int totalSeqs = 0;
1135                 int numChimeras = 0;
1136                 
1137                 for (int i = start; i < end; i++) {
1138                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
1139             
1140                         int error;
1141             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1142             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1143                         
1144                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1145                         totalSeqs += numSeqs;
1146                         
1147                         if (m->control_pressed) { return 0; }
1148                         
1149                         //remove file made for uchime
1150                         if (!m->debug) {  m->mothurRemove(filename);  }
1151             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1152                         
1153                         //append files
1154                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1155                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1156                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1157                         
1158                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1159                 }       
1160                 return totalSeqs;
1161                 
1162         }
1163         catch(exception& e) {
1164                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1165                 exit(1);
1166         }
1167 }       
1168 //**********************************************************************************************************************
1169
1170 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1171         try {
1172                 
1173                 outputFName = m->getFullPathName(outputFName);
1174                 filename = m->getFullPathName(filename);
1175                 alns = m->getFullPathName(alns);
1176                 
1177                 //to allow for spaces in the path
1178                 outputFName = "\"" + outputFName + "\"";
1179                 filename = "\"" + filename + "\"";
1180                 alns = "\"" + alns + "\"";
1181                                 
1182                 vector<char*> cPara;
1183                 
1184                 string uchimeCommand = uchimeLocation;
1185         uchimeCommand = "\"" + uchimeCommand + "\" ";
1186         
1187         char* tempUchime;
1188                 tempUchime= new char[uchimeCommand.length()+1]; 
1189                 *tempUchime = '\0';
1190                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1191                 cPara.push_back(tempUchime);
1192                 
1193         //are you using a reference file
1194                 if (templatefile != "self") {
1195             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1196             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1197             filename = outputFileName;
1198             filename = "\"" + filename + "\"";
1199                         //add reference file
1200                         char* tempRef = new char[5]; 
1201                         //strcpy(tempRef, "--db"); 
1202                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1203                         cPara.push_back(tempRef);  
1204                         char* tempR = new char[templatefile.length()+1];
1205                         //strcpy(tempR, templatefile.c_str());
1206                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1207                         cPara.push_back(tempR);
1208                 }
1209                 
1210                 char* tempIn = new char[8]; 
1211                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1212                 //strcpy(tempIn, "--input"); 
1213                 cPara.push_back(tempIn);
1214                 char* temp = new char[filename.length()+1];
1215                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1216                 //strcpy(temp, filename.c_str());
1217                 cPara.push_back(temp);
1218                 
1219                 char* tempO = new char[12]; 
1220                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1221                 //strcpy(tempO, "--uchimeout"); 
1222                 cPara.push_back(tempO);
1223                 char* tempout = new char[outputFName.length()+1];
1224                 //strcpy(tempout, outputFName.c_str());
1225                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1226                 cPara.push_back(tempout);
1227                 
1228                 if (chimealns) {
1229                         char* tempA = new char[13]; 
1230                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1231                         //strcpy(tempA, "--uchimealns"); 
1232                         cPara.push_back(tempA);
1233                         char* tempa = new char[alns.length()+1];
1234                         //strcpy(tempa, alns.c_str());
1235                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1236                         cPara.push_back(tempa);
1237                 }
1238                 
1239                 if (useAbskew) {
1240                         char* tempskew = new char[9];
1241                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1242                         //strcpy(tempskew, "--abskew"); 
1243                         cPara.push_back(tempskew);
1244                         char* tempSkew = new char[abskew.length()+1];
1245                         //strcpy(tempSkew, abskew.c_str());
1246                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1247                         cPara.push_back(tempSkew);
1248                 }
1249                 
1250                 if (useMinH) {
1251                         char* tempminh = new char[7]; 
1252                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1253                         //strcpy(tempminh, "--minh"); 
1254                         cPara.push_back(tempminh);
1255                         char* tempMinH = new char[minh.length()+1];
1256                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1257                         //strcpy(tempMinH, minh.c_str());
1258                         cPara.push_back(tempMinH);
1259                 }
1260                 
1261                 if (useMindiv) {
1262                         char* tempmindiv = new char[9]; 
1263                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1264                         //strcpy(tempmindiv, "--mindiv"); 
1265                         cPara.push_back(tempmindiv);
1266                         char* tempMindiv = new char[mindiv.length()+1];
1267                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1268                         //strcpy(tempMindiv, mindiv.c_str());
1269                         cPara.push_back(tempMindiv);
1270                 }
1271                 
1272                 if (useXn) {
1273                         char* tempxn = new char[5]; 
1274                         //strcpy(tempxn, "--xn"); 
1275                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1276                         cPara.push_back(tempxn);
1277                         char* tempXn = new char[xn.length()+1];
1278                         //strcpy(tempXn, xn.c_str());
1279                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1280                         cPara.push_back(tempXn);
1281                 }
1282                 
1283                 if (useDn) {
1284                         char* tempdn = new char[5]; 
1285                         //strcpy(tempdn, "--dn"); 
1286                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1287                         cPara.push_back(tempdn);
1288                         char* tempDn = new char[dn.length()+1];
1289                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1290                         //strcpy(tempDn, dn.c_str());
1291                         cPara.push_back(tempDn);
1292                 }
1293                 
1294                 if (useXa) {
1295                         char* tempxa = new char[5]; 
1296                         //strcpy(tempxa, "--xa"); 
1297                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1298                         cPara.push_back(tempxa);
1299                         char* tempXa = new char[xa.length()+1];
1300                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1301                         //strcpy(tempXa, xa.c_str());
1302                         cPara.push_back(tempXa);
1303                 }
1304                 
1305                 if (useChunks) {
1306                         char* tempchunks = new char[9]; 
1307                         //strcpy(tempchunks, "--chunks"); 
1308                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1309                         cPara.push_back(tempchunks);
1310                         char* tempChunks = new char[chunks.length()+1];
1311                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1312                         //strcpy(tempChunks, chunks.c_str());
1313                         cPara.push_back(tempChunks);
1314                 }
1315                 
1316                 if (useMinchunk) {
1317                         char* tempminchunk = new char[11]; 
1318                         //strcpy(tempminchunk, "--minchunk"); 
1319                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1320                         cPara.push_back(tempminchunk);
1321                         char* tempMinchunk = new char[minchunk.length()+1];
1322                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1323                         //strcpy(tempMinchunk, minchunk.c_str());
1324                         cPara.push_back(tempMinchunk);
1325                 }
1326                 
1327                 if (useIdsmoothwindow) {
1328                         char* tempidsmoothwindow = new char[17]; 
1329                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1330                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1331                         cPara.push_back(tempidsmoothwindow);
1332                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1333                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1334                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1335                         cPara.push_back(tempIdsmoothwindow);
1336                 }
1337                 
1338                 /*if (useMinsmoothid) {
1339                         char* tempminsmoothid = new char[14]; 
1340                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1341                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1342                         cPara.push_back(tempminsmoothid);
1343                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1344                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1345                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1346                         cPara.push_back(tempMinsmoothid);
1347                 }*/
1348                 
1349                 if (useMaxp) {
1350                         char* tempmaxp = new char[7]; 
1351                         //strcpy(tempmaxp, "--maxp"); 
1352                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1353                         cPara.push_back(tempmaxp);
1354                         char* tempMaxp = new char[maxp.length()+1];
1355                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1356                         //strcpy(tempMaxp, maxp.c_str());
1357                         cPara.push_back(tempMaxp);
1358                 }
1359                 
1360                 if (!skipgaps) {
1361                         char* tempskipgaps = new char[13]; 
1362                         //strcpy(tempskipgaps, "--[no]skipgaps");
1363                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1364                         cPara.push_back(tempskipgaps);
1365                 }
1366                 
1367                 if (!skipgaps2) {
1368                         char* tempskipgaps2 = new char[14]; 
1369                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1370                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1371                         cPara.push_back(tempskipgaps2);
1372                 }
1373                 
1374                 if (useMinlen) {
1375                         char* tempminlen = new char[9]; 
1376                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1377                         //strcpy(tempminlen, "--minlen"); 
1378                         cPara.push_back(tempminlen);
1379                         char* tempMinlen = new char[minlen.length()+1];
1380                         //strcpy(tempMinlen, minlen.c_str());
1381                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1382                         cPara.push_back(tempMinlen);
1383                 }
1384                 
1385                 if (useMaxlen) {
1386                         char* tempmaxlen = new char[9]; 
1387                         //strcpy(tempmaxlen, "--maxlen"); 
1388                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1389                         cPara.push_back(tempmaxlen);
1390                         char* tempMaxlen = new char[maxlen.length()+1];
1391                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1392                         //strcpy(tempMaxlen, maxlen.c_str());
1393                         cPara.push_back(tempMaxlen);
1394                 }
1395                 
1396                 if (ucl) {
1397                         char* tempucl = new char[5]; 
1398                         strcpy(tempucl, "--ucl"); 
1399                         cPara.push_back(tempucl);
1400                 }
1401                 
1402                 if (useQueryfract) {
1403                         char* tempqueryfract = new char[13]; 
1404                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1405                         //strcpy(tempqueryfract, "--queryfract"); 
1406                         cPara.push_back(tempqueryfract);
1407                         char* tempQueryfract = new char[queryfract.length()+1];
1408                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1409                         //strcpy(tempQueryfract, queryfract.c_str());
1410                         cPara.push_back(tempQueryfract);
1411                 }
1412                 
1413                 
1414                 char** uchimeParameters;
1415                 uchimeParameters = new char*[cPara.size()];
1416                 string commandString = "";
1417                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1418                 //int numArgs = cPara.size();
1419                 
1420                 //uchime_main(numArgs, uchimeParameters); 
1421                 //cout << "commandString = " << commandString << endl;
1422 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1423 #else
1424                 commandString = "\"" + commandString + "\"";
1425 #endif
1426         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1427                 system(commandString.c_str());
1428                 
1429                 //free memory
1430                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1431                 delete[] uchimeParameters; 
1432                 
1433                 //remove "" from filenames
1434                 outputFName = outputFName.substr(1, outputFName.length()-2);
1435                 filename = filename.substr(1, filename.length()-2);
1436                 alns = alns.substr(1, alns.length()-2);
1437                 
1438                 if (m->control_pressed) { return 0; }
1439                 
1440                 //create accnos file from uchime results
1441                 ifstream in; 
1442                 m->openInputFile(outputFName, in);
1443                 
1444                 ofstream out;
1445                 m->openOutputFile(accnos, out);
1446                 
1447                 int num = 0;
1448                 numChimeras = 0;
1449                 while(!in.eof()) {
1450                         
1451                         if (m->control_pressed) { break; }
1452                         
1453                         string name = "";
1454                         string chimeraFlag = "";
1455                         in >> chimeraFlag >> name;
1456                         
1457                         //fix name if needed
1458                         if (templatefile == "self") { 
1459                                 name = name.substr(0, name.length()-1); //rip off last /
1460                                 name = name.substr(0, name.find_last_of('/'));
1461                         }
1462                         
1463                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1464                         m->gobble(in);
1465                         
1466                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1467                         num++;
1468                 }
1469                 in.close();
1470                 out.close();
1471                 
1472         //if (templatefile != "self") {  m->mothurRemove(filename); }
1473         
1474                 return num;
1475         }
1476         catch(exception& e) {
1477                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1478                 exit(1);
1479         }
1480 }
1481 /**************************************************************************************************/
1482 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1483 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1484         try {
1485         
1486         ifstream in;
1487         m->openInputFile(filename, in);
1488         
1489         ofstream out;
1490         m->openOutputFile(output, out);
1491         
1492         while (!in.eof()) {
1493             if (m->control_pressed) { break;  }
1494             
1495             Sequence seq(in); m->gobble(in);
1496             
1497             if (seq.getName() != "") { seq.printSequence(out); }
1498         }
1499         in.close();
1500         out.close();
1501         
1502         return 0;
1503     }
1504         catch(exception& e) {
1505                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1506                 exit(1);
1507         }
1508 }
1509 /**************************************************************************************************/
1510
1511 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1512         try {
1513                 
1514                 processIDS.clear();
1515                 int process = 1;
1516                 int num = 0;
1517                 vector<string> files;
1518                 
1519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1520                 //break up file into multiple files
1521                 m->divideFile(filename, processors, files);
1522                 
1523                 if (m->control_pressed) {  return 0;  }
1524                                 
1525                 //loop through and create all the processes you want
1526                 while (process != processors) {
1527                         int pid = fork();
1528                         
1529                         if (pid > 0) {
1530                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1531                                 process++;
1532                         }else if (pid == 0){
1533                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1534                                 
1535                                 //pass numSeqs to parent
1536                                 ofstream out;
1537                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1538                                 m->openOutputFile(tempFile, out);
1539                                 out << num << endl;
1540                                 out << numChimeras << endl;
1541                                 out.close();
1542                                 
1543                                 exit(0);
1544                         }else { 
1545                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1546                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1547                                 exit(0);
1548                         }
1549                 }
1550                 
1551                 //do my part
1552                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1553                 
1554                 //force parent to wait until all the processes are done
1555                 for (int i=0;i<processIDS.size();i++) { 
1556                         int temp = processIDS[i];
1557                         wait(&temp);
1558                 }
1559                 
1560                 for (int i = 0; i < processIDS.size(); i++) {
1561                         ifstream in;
1562                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1563                         m->openInputFile(tempFile, in);
1564                         if (!in.eof()) { 
1565                                 int tempNum = 0; 
1566                                 in >> tempNum; m->gobble(in);
1567                                 num += tempNum; 
1568                                 in >> tempNum;
1569                                 numChimeras += tempNum;
1570                         }
1571                         in.close(); m->mothurRemove(tempFile);
1572                 }
1573 #else
1574                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1575                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1576                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1577                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1578                 
1579                 //divide file
1580                 int count = 0;
1581                 int spot = 0;
1582                 map<int, ofstream*> filehandles;
1583                 map<int, ofstream*>::iterator it3;
1584                 
1585                 ofstream* temp;
1586                 for (int i = 0; i < processors; i++) {
1587                         temp = new ofstream;
1588                         filehandles[i] = temp;
1589                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1590                         files.push_back(filename+toString(i)+".temp");
1591                 }
1592                 
1593                 ifstream in;
1594                 m->openInputFile(filename, in);
1595                 
1596                 while(!in.eof()) {
1597                         
1598                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1599                         
1600                         Sequence tempSeq(in); m->gobble(in); 
1601                         
1602                         if (tempSeq.getName() != "") {
1603                                 tempSeq.printSequence(*(filehandles[spot])); 
1604                                 spot++; count++;
1605                                 if (spot == processors) { spot = 0; }
1606                         }
1607                 }
1608                 in.close();
1609                 
1610                 //delete memory
1611                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1612                         (*(it3->second)).close();
1613                         delete it3->second;
1614                 }
1615                 
1616                 //sanity check for number of processors
1617                 if (count < processors) { processors = count; }
1618                 
1619                 vector<uchimeData*> pDataArray; 
1620                 DWORD   dwThreadIdArray[processors-1];
1621                 HANDLE  hThreadArray[processors-1]; 
1622                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1623                 
1624                 //Create processor worker threads.
1625                 for( int i=1; i<processors; i++ ){
1626                         // Allocate memory for thread data.
1627                         string extension = toString(i) + ".temp";
1628                         
1629                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1630                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1631                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1632                         
1633                         pDataArray.push_back(tempUchime);
1634                         processIDS.push_back(i);
1635                         
1636                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1637                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1638                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1639                 }
1640                 
1641                 
1642                 //using the main process as a worker saves time and memory
1643                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1644                 
1645                 //Wait until all threads have terminated.
1646                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1647                 
1648                 //Close all thread handles and free memory allocations.
1649                 for(int i=0; i < pDataArray.size(); i++){
1650                         num += pDataArray[i]->count;
1651                         numChimeras += pDataArray[i]->numChimeras;
1652                         CloseHandle(hThreadArray[i]);
1653                         delete pDataArray[i];
1654                 }
1655 #endif          
1656                 
1657                 //append output files
1658                 for(int i=0;i<processIDS.size();i++){
1659                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1660                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1661                         
1662                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1663                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1664                         
1665                         if (chimealns) {
1666                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1667                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1668                         }
1669                 }
1670                 
1671                 //get rid of the file pieces.
1672                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1673                 return num;     
1674         }
1675         catch(exception& e) {
1676                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1677                 exit(1);
1678         }
1679 }
1680 /**************************************************************************************************/
1681
1682 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1683         try {
1684                 
1685                 processIDS.clear();
1686                 int process = 1;
1687                 int num = 0;
1688                 
1689                 //sanity check
1690                 if (groups.size() < processors) { processors = groups.size(); }
1691                 
1692                 //divide the groups between the processors
1693                 vector<linePair> lines;
1694                 int numGroupsPerProcessor = groups.size() / processors;
1695                 for (int i = 0; i < processors; i++) {
1696                         int startIndex =  i * numGroupsPerProcessor;
1697                         int endIndex = (i+1) * numGroupsPerProcessor;
1698                         if(i == (processors - 1)){      endIndex = groups.size();       }
1699                         lines.push_back(linePair(startIndex, endIndex));
1700                 }
1701                 
1702 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1703                                 
1704                 //loop through and create all the processes you want
1705                 while (process != processors) {
1706                         int pid = fork();
1707                         
1708                         if (pid > 0) {
1709                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1710                                 process++;
1711                         }else if (pid == 0){
1712                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1713                                 
1714                                 //pass numSeqs to parent
1715                                 ofstream out;
1716                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1717                                 m->openOutputFile(tempFile, out);
1718                                 out << num << endl;
1719                                 out.close();
1720                                 
1721                                 exit(0);
1722                         }else { 
1723                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1724                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1725                                 exit(0);
1726                         }
1727                 }
1728                 
1729                 //do my part
1730                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1731                 
1732                 //force parent to wait until all the processes are done
1733                 for (int i=0;i<processIDS.size();i++) { 
1734                         int temp = processIDS[i];
1735                         wait(&temp);
1736                 }
1737                 
1738                 for (int i = 0; i < processIDS.size(); i++) {
1739                         ifstream in;
1740                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1741                         m->openInputFile(tempFile, in);
1742                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1743                         in.close(); m->mothurRemove(tempFile);
1744                 }
1745                                 
1746 #else
1747                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1748                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1749                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1750                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1751                 
1752                 vector<uchimeData*> pDataArray; 
1753                 DWORD   dwThreadIdArray[processors-1];
1754                 HANDLE  hThreadArray[processors-1]; 
1755                 
1756                 //Create processor worker threads.
1757                 for( int i=1; i<processors; i++ ){
1758                         // Allocate memory for thread data.
1759                         string extension = toString(i) + ".temp";
1760                         
1761                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1762                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1763                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1764                         
1765                         pDataArray.push_back(tempUchime);
1766                         processIDS.push_back(i);
1767                         
1768                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1769                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1770                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1771                 }
1772                 
1773                 
1774                 //using the main process as a worker saves time and memory
1775                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1776                 
1777                 //Wait until all threads have terminated.
1778                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1779                 
1780                 //Close all thread handles and free memory allocations.
1781                 for(int i=0; i < pDataArray.size(); i++){
1782                         num += pDataArray[i]->count;
1783                         CloseHandle(hThreadArray[i]);
1784                         delete pDataArray[i];
1785                 }
1786 #endif          
1787                 
1788                                 
1789                 //append output files
1790                 for(int i=0;i<processIDS.size();i++){
1791                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1792                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1793                         
1794                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1795                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1796                         
1797                         if (chimealns) {
1798                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1799                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1800                         }
1801                 }
1802                 
1803                 return num;     
1804                 
1805         }
1806         catch(exception& e) {
1807                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1808                 exit(1);
1809         }
1810 }
1811 /**************************************************************************************************/
1812