]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
added topdown parameter to pre.cluster. added more debugging output to bayesian...
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26         CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
30                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
31                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
32                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
33                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
34                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
35                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
36                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
37                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
38                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
39         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
40
41                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
42                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
43                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
44                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
45                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
46                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
47                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
48                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
49
50                 vector<string> myArray;
51                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string ChimeraUchimeCommand::getHelpString(){   
61         try {
62                 string helpString = "";
63                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
64                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
65                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
66                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
67                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
68         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
71         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
72                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
73                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
74                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
75                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
76                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
77                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
78                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
79                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
80                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
81                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
82                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
83                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
84                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
85                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
86                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
87                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
88                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
89                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
90                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
91                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
92 #ifdef USE_MPI
93                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
94 #endif
95                 helpString += "The chimera.uchime command should be in the following format: \n";
96                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
97                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraUchimeCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
113         else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
114         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
115         
116         return pattern;
117     }
118     catch(exception& e) {
119         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
120         exit(1);
121     }
122 }
123 //**********************************************************************************************************************
124 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
125         try {
126                 abort = true; calledHelp = true;
127                 setParameters();
128                 vector<string> tempOutNames;
129                 outputTypes["chimera"] = tempOutNames;
130                 outputTypes["accnos"] = tempOutNames;
131                 outputTypes["alns"] = tempOutNames;
132         }
133         catch(exception& e) {
134                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
135                 exit(1);
136         }
137 }
138 //***************************************************************************************************************
139 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
140         try {
141                 abort = false; calledHelp = false; hasName=false; hasCount=false;
142                 ReferenceDB* rdb = ReferenceDB::getInstance();
143                 
144                 //allow user to run help
145                 if(option == "help") { help(); abort = true; calledHelp = true; }
146                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
147                 
148                 else {
149                         vector<string> myArray = setParameters();
150                         
151                         OptionParser parser(option);
152                         map<string,string> parameters = parser.getParameters();
153                         
154                         ValidParameters validParameter("chimera.uchime");
155                         map<string,string>::iterator it;
156                         
157                         //check to make sure all parameters are valid for command
158                         for (it = parameters.begin(); it != parameters.end(); it++) { 
159                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
160                         }
161                         
162                         vector<string> tempOutNames;
163                         outputTypes["chimera"] = tempOutNames;
164                         outputTypes["accnos"] = tempOutNames;
165                         outputTypes["alns"] = tempOutNames;
166                         
167                         //if the user changes the input directory command factory will send this info to us in the output parameter 
168                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
169                         if (inputDir == "not found"){   inputDir = "";          }
170                         
171                         //check for required parameters
172                         fastafile = validParameter.validFile(parameters, "fasta", false);
173                         if (fastafile == "not found") {                                 
174                                 //if there is a current fasta file, use it
175                                 string filename = m->getFastaFile(); 
176                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
177                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
178                         }else { 
179                                 m->splitAtDash(fastafile, fastaFileNames);
180                                 
181                                 //go through files and make sure they are good, if not, then disregard them
182                                 for (int i = 0; i < fastaFileNames.size(); i++) {
183                                         
184                                         bool ignore = false;
185                                         if (fastaFileNames[i] == "current") { 
186                                                 fastaFileNames[i] = m->getFastaFile(); 
187                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
188                                                 else {  
189                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
190                                                         //erase from file list
191                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
192                                                         i--;
193                                                 }
194                                         }
195                                         
196                                         if (!ignore) {
197                                                 
198                                                 if (inputDir != "") {
199                                                         string path = m->hasPath(fastaFileNames[i]);
200                                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
202                                                 }
203                                                 
204                                                 int ableToOpen;
205                                                 ifstream in;
206                                                 
207                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
208                                                 
209                                                 //if you can't open it, try default location
210                                                 if (ableToOpen == 1) {
211                                                         if (m->getDefaultPath() != "") { //default path is set
212                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
213                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
214                                                                 ifstream in2;
215                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
216                                                                 in2.close();
217                                                                 fastaFileNames[i] = tryPath;
218                                                         }
219                                                 }
220                                                 
221                                                 if (ableToOpen == 1) {
222                                                         if (m->getOutputDir() != "") { //default path is set
223                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
224                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
225                                                                 ifstream in2;
226                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
227                                                                 in2.close();
228                                                                 fastaFileNames[i] = tryPath;
229                                                         }
230                                                 }
231                                                 
232                                                 in.close();
233                                                 
234                                                 if (ableToOpen == 1) { 
235                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
236                                                         //erase from file list
237                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
238                                                         i--;
239                                                 }else {
240                                                         m->setFastaFile(fastaFileNames[i]);
241                                                 }
242                                         }
243                                 }
244                                 
245                                 //make sure there is at least one valid file left
246                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
247                         }
248                         
249                         
250                         //check for required parameters
251                         namefile = validParameter.validFile(parameters, "name", false);
252                         if (namefile == "not found") { namefile = "";   }
253                         else { 
254                                 m->splitAtDash(namefile, nameFileNames);
255                                 
256                                 //go through files and make sure they are good, if not, then disregard them
257                                 for (int i = 0; i < nameFileNames.size(); i++) {
258                                         
259                                         bool ignore = false;
260                                         if (nameFileNames[i] == "current") { 
261                                                 nameFileNames[i] = m->getNameFile(); 
262                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
263                                                 else {  
264                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
265                                                         //erase from file list
266                                                         nameFileNames.erase(nameFileNames.begin()+i);
267                                                         i--;
268                                                 }
269                                         }
270                                         
271                                         if (!ignore) {
272                                                 
273                                                 if (inputDir != "") {
274                                                         string path = m->hasPath(nameFileNames[i]);
275                                                         //if the user has not given a path then, add inputdir. else leave path alone.
276                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
277                                                 }
278                                                 
279                                                 int ableToOpen;
280                                                 ifstream in;
281                                                 
282                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
283                                                 
284                                                 //if you can't open it, try default location
285                                                 if (ableToOpen == 1) {
286                                                         if (m->getDefaultPath() != "") { //default path is set
287                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
288                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
289                                                                 ifstream in2;
290                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
291                                                                 in2.close();
292                                                                 nameFileNames[i] = tryPath;
293                                                         }
294                                                 }
295                                                 
296                                                 if (ableToOpen == 1) {
297                                                         if (m->getOutputDir() != "") { //default path is set
298                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
299                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
300                                                                 ifstream in2;
301                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
302                                                                 in2.close();
303                                                                 nameFileNames[i] = tryPath;
304                                                         }
305                                                 }
306                                                 
307                                                 in.close();
308                                                 
309                                                 if (ableToOpen == 1) { 
310                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
311                                                         //erase from file list
312                                                         nameFileNames.erase(nameFileNames.begin()+i);
313                                                         i--;
314                                                 }else {
315                                                         m->setNameFile(nameFileNames[i]);
316                                                 }
317                                         }
318                                 }
319                         }
320             
321             if (nameFileNames.size() != 0) { hasName = true; }
322             
323             //check for required parameters
324             vector<string> countfileNames;
325                         countfile = validParameter.validFile(parameters, "count", false);
326                         if (countfile == "not found") { 
327                 countfile = "";  
328                         }else { 
329                                 m->splitAtDash(countfile, countfileNames);
330                                 
331                                 //go through files and make sure they are good, if not, then disregard them
332                                 for (int i = 0; i < countfileNames.size(); i++) {
333                                         
334                                         bool ignore = false;
335                                         if (countfileNames[i] == "current") { 
336                                                 countfileNames[i] = m->getCountTableFile(); 
337                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
338                                                 else {  
339                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
340                                                         //erase from file list
341                                                         countfileNames.erase(countfileNames.begin()+i);
342                                                         i--;
343                                                 }
344                                         }
345                                         
346                                         if (!ignore) {
347                                                 
348                                                 if (inputDir != "") {
349                                                         string path = m->hasPath(countfileNames[i]);
350                                                         //if the user has not given a path then, add inputdir. else leave path alone.
351                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
352                                                 }
353                                                 
354                                                 int ableToOpen;
355                                                 ifstream in;
356                                                 
357                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
358                                                 
359                                                 //if you can't open it, try default location
360                                                 if (ableToOpen == 1) {
361                                                         if (m->getDefaultPath() != "") { //default path is set
362                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
363                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
364                                                                 ifstream in2;
365                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
366                                                                 in2.close();
367                                                                 countfileNames[i] = tryPath;
368                                                         }
369                                                 }
370                                                 
371                                                 if (ableToOpen == 1) {
372                                                         if (m->getOutputDir() != "") { //default path is set
373                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
374                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
375                                                                 ifstream in2;
376                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
377                                                                 in2.close();
378                                                                 countfileNames[i] = tryPath;
379                                                         }
380                                                 }
381                                                 
382                                                 in.close();
383                                                 
384                                                 if (ableToOpen == 1) { 
385                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
386                                                         //erase from file list
387                                                         countfileNames.erase(countfileNames.begin()+i);
388                                                         i--;
389                                                 }else {
390                                                         m->setCountTableFile(countfileNames[i]);
391                                                 }
392                                         }
393                                 }
394                         }
395             
396             if (countfileNames.size() != 0) { hasCount = true; }
397             
398                         //make sure there is at least one valid file left
399             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
400             
401             if (!hasName && hasCount) { nameFileNames = countfileNames; }
402             
403                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
404                         
405                         bool hasGroup = true;
406                         groupfile = validParameter.validFile(parameters, "group", false);
407                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
408                         else { 
409                                 m->splitAtDash(groupfile, groupFileNames);
410                                 
411                                 //go through files and make sure they are good, if not, then disregard them
412                                 for (int i = 0; i < groupFileNames.size(); i++) {
413                                         
414                                         bool ignore = false;
415                                         if (groupFileNames[i] == "current") { 
416                                                 groupFileNames[i] = m->getGroupFile(); 
417                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
418                                                 else {  
419                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
420                                                         //erase from file list
421                                                         groupFileNames.erase(groupFileNames.begin()+i);
422                                                         i--;
423                                                 }
424                                         }
425                                         
426                                         if (!ignore) {
427                                                 
428                                                 if (inputDir != "") {
429                                                         string path = m->hasPath(groupFileNames[i]);
430                                                         //if the user has not given a path then, add inputdir. else leave path alone.
431                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
432                                                 }
433                                                 
434                                                 int ableToOpen;
435                                                 ifstream in;
436                                                 
437                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
438                                                 
439                                                 //if you can't open it, try default location
440                                                 if (ableToOpen == 1) {
441                                                         if (m->getDefaultPath() != "") { //default path is set
442                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
443                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
444                                                                 ifstream in2;
445                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
446                                                                 in2.close();
447                                                                 groupFileNames[i] = tryPath;
448                                                         }
449                                                 }
450                                                 
451                                                 if (ableToOpen == 1) {
452                                                         if (m->getOutputDir() != "") { //default path is set
453                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
454                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
455                                                                 ifstream in2;
456                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
457                                                                 in2.close();
458                                                                 groupFileNames[i] = tryPath;
459                                                         }
460                                                 }
461                                                 
462                                                 in.close();
463                                                 
464                                                 if (ableToOpen == 1) { 
465                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
466                                                         //erase from file list
467                                                         groupFileNames.erase(groupFileNames.begin()+i);
468                                                         i--;
469                                                 }else {
470                                                         m->setGroupFile(groupFileNames[i]);
471                                                 }
472                                         }
473                                 }
474                                 
475                                 //make sure there is at least one valid file left
476                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
477                         }
478                         
479                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
480                         
481             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
482                         //if the user changes the output directory command factory will send this info to us in the output parameter 
483                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
484                         
485                         
486                         //if the user changes the output directory command factory will send this info to us in the output parameter 
487                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
488                         
489                         string path;
490                         it = parameters.find("reference");
491                         //user has given a template file
492                         if(it != parameters.end()){ 
493                                 if (it->second == "self") { templatefile = "self"; }
494                                 else {
495                                         path = m->hasPath(it->second);
496                                         //if the user has not given a path then, add inputdir. else leave path alone.
497                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
498                                         
499                                         templatefile = validParameter.validFile(parameters, "reference", true);
500                                         if (templatefile == "not open") { abort = true; }
501                                         else if (templatefile == "not found") { //check for saved reference sequences
502                                                 if (rdb->getSavedReference() != "") {
503                                                         templatefile = rdb->getSavedReference();
504                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
505                                                 }else {
506                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
507                                                         m->mothurOutEndLine();
508                                                         abort = true; 
509                                                 }
510                                         }
511                                 }
512                         }else if (hasName) {  templatefile = "self"; }
513             else if (hasCount) {  templatefile = "self"; }
514                         else { 
515                                 if (rdb->getSavedReference() != "") {
516                                         templatefile = rdb->getSavedReference();
517                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
518                                 }else {
519                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
520                                         m->mothurOutEndLine();
521                                         templatefile = ""; abort = true; 
522                                 } 
523                         }
524                                 
525                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
526                         m->setProcessors(temp);
527                         m->mothurConvert(temp, processors);
528                         
529                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
530                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
531                         
532                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
533                         chimealns = m->isTrue(temp); 
534                         
535                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
536                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
537                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
538                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
539                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
540                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
541                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
542                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
543                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
544                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
545                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
546                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
547             
548             strand = validParameter.validFile(parameters, "strand", false);     if (strand == "not found")      {  strand = ""; }
549                         
550                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
551                         ucl = m->isTrue(temp);
552                         
553                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
554                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
555                         
556                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
557                         skipgaps = m->isTrue(temp); 
558
559                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
560                         skipgaps2 = m->isTrue(temp); 
561             
562             
563                         temp = validParameter.validFile(parameters, "dereplicate", false);      
564                         if (temp == "not found") { 
565                                 if (groupfile != "")    {  temp = "false";                                      }
566                                 else                    {  temp = "true";       }
567                         }
568                         dups = m->isTrue(temp);
569
570                         
571                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
572                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
573                         
574                         //look for uchime exe
575                         path = m->argv;
576                         string tempPath = path;
577                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
578                         path = path.substr(0, (tempPath.find_last_of('m')));
579                         
580                         string uchimeCommand;
581 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
582                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
583             if (m->debug) { 
584                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
585                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
586                 newCommand->execute();
587                 delete newCommand;
588                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
589                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
590                 newCommand->execute();
591                 delete newCommand;
592             }
593 #else
594                         uchimeCommand = path + "uchime.exe";
595 #endif
596         
597                         //test to make sure uchime exists
598                         ifstream in;
599                         uchimeCommand = m->getFullPathName(uchimeCommand);
600                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
601                         if(ableToOpen == 1) {   
602                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
603                 //check to see if uchime is in the path??
604                 
605                 string uLocation = m->findProgramPath("uchime");
606                 
607                 
608                 ifstream in2;
609 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
610                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
611 #else
612                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
613 #endif
614
615                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
616                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
617             }else {  uchimeLocation = uchimeCommand; }
618             
619             uchimeLocation = m->getFullPathName(uchimeLocation);
620         }
621         }
622         catch(exception& e) {
623                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
624                 exit(1);
625         }
626 }
627 //***************************************************************************************************************
628
629 int ChimeraUchimeCommand::execute(){
630         try{
631         
632         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
633                 
634                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
635                 
636                 for (int s = 0; s < fastaFileNames.size(); s++) {
637                         
638                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
639                         
640                         int start = time(NULL); 
641                         string nameFile = "";
642                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
643                         map<string, string> variables; 
644             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
645                         string outputFileName = getOutputFileName("chimera", variables);
646                         string accnosFileName = getOutputFileName("accnos", variables);
647                         string alnsFileName = getOutputFileName("alns", variables);
648                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
649                                 
650                         //you provided a groupfile
651                         string groupFile = "";
652             bool hasGroup = false;
653                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
654             else if (hasCount) {
655                 CountTable ct;
656                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
657             }
658                         
659                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
660
661                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
662                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
663                                         nameFile = nameFileNames[s];
664                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
665                                                                                 
666                                 map<string, string> seqs;  
667                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
668
669                                 //read namefile
670                                 vector<seqPriorityNode> nameMapCount;
671                 int error;
672                 if (hasCount) {
673                     CountTable ct;
674                     ct.readTable(nameFile);
675                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
676                         int num = ct.getNumSeqs(it->first);
677                         if (num == 0) { error = 1; }
678                         else {
679                             seqPriorityNode temp(num, it->second, it->first);
680                             nameMapCount.push_back(temp);
681                         }
682                     }
683                 }else {
684                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
685                 }
686                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
687                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
688                                 
689                                 printFile(nameMapCount, newFasta);
690                                 fastaFileNames[s] = newFasta;
691                         }
692                         
693                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
694                         
695                         if (hasGroup) {
696                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
697                                         nameFile = nameFileNames[s];
698                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
699                                 
700                                 //Parse sequences by group
701                 vector<string> groups;
702                 map<string, string> uniqueNames;
703                 if (hasCount) {
704                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
705                     groups = cparser->getNamesOfGroups();
706                     uniqueNames = cparser->getAllSeqsMap();
707                 }else{
708                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
709                     groups = sparser->getNamesOfGroups();
710                     uniqueNames = sparser->getAllSeqsMap();
711                 }
712                                         
713                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
714                                                                 
715                                 //clears files
716                                 ofstream out, out1, out2;
717                                 m->openOutputFile(outputFileName, out); out.close(); 
718                                 m->openOutputFile(accnosFileName, out1); out1.close();
719                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
720                                 int totalSeqs = 0;
721                                 
722                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
723                                 else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
724
725                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
726                 if (hasCount) { delete cparser; }
727                 else { delete sparser; }
728                 
729                 if (!dups) { 
730                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
731                                 
732                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
733                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
734                                 }
735                 
736                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
737                                         
738                         }else{
739                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
740                         
741                                 int numSeqs = 0;
742                                 int numChimeras = 0;
743
744                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
745                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
746                                 
747                                 //add headings
748                                 ofstream out;
749                                 m->openOutputFile(outputFileName+".temp", out); 
750                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
751                                 out.close();
752                                 
753                                 m->appendFiles(outputFileName, outputFileName+".temp");
754                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
755                                 
756                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
757                         
758                                 //remove file made for uchime
759                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
760                         
761                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
762                         }
763                         
764                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
765                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
766                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
767                 }
768         
769                 //set accnos file as new current accnosfile
770                 string current = "";
771                 itTypes = outputTypes.find("accnos");
772                 if (itTypes != outputTypes.end()) {
773                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
774                 }
775                 
776                 m->mothurOutEndLine();
777                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
778                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
779                 m->mothurOutEndLine();
780                 
781                 return 0;
782                 
783         }
784         catch(exception& e) {
785                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
786                 exit(1);
787         }
788 }
789 //**********************************************************************************************************************
790 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
791         try {
792                 map<string, string>::iterator itUnique;
793                 int total = 0;
794                 
795                 //edit accnos file
796                 ifstream in2; 
797                 m->openInputFile(accnosFileName, in2);
798                 
799                 ofstream out2;
800                 m->openOutputFile(accnosFileName+".temp", out2);
801                 
802                 string name;
803                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
804                 set<string>::iterator itNames;
805                 set<string> chimerasInFile;
806                 set<string>::iterator itChimeras;
807
808                 
809                 while (!in2.eof()) {
810                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
811                         
812                         in2 >> name; m->gobble(in2);
813                         
814                         //find unique name
815                         itUnique = uniqueNames.find(name);
816                         
817                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
818                         else {
819                                 itChimeras = chimerasInFile.find((itUnique->second));
820                                 
821                                 if (itChimeras == chimerasInFile.end()) {
822                                         out2 << itUnique->second << endl;
823                                         chimerasInFile.insert((itUnique->second));
824                                         total++;
825                                 }
826                         }
827                 }
828                 in2.close();
829                 out2.close();
830                 
831                 m->mothurRemove(accnosFileName);
832                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
833                 
834                 
835                 
836                 //edit chimera file
837                 ifstream in; 
838                 m->openInputFile(outputFileName, in);
839                 
840                 ofstream out;
841                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
842                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
843                 
844                 float temp1;
845                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
846                 name = "";
847                 namesInFile.clear();    
848                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
849                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
850                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
851                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
852                 */
853                 
854                 while (!in.eof()) {
855                         
856                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
857                         
858                         bool print = false;
859                         in >> temp1;    m->gobble(in);
860                         in >> name;             m->gobble(in);
861                         in >> parent1;  m->gobble(in);
862                         in >> parent2;  m->gobble(in);
863                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
864                         m->gobble(in);
865                         
866                         //parse name - name will look like U68590/ab=1/
867                         string restOfName = "";
868                         int pos = name.find_first_of('/');
869                         if (pos != string::npos) {
870                                 restOfName = name.substr(pos);
871                                 name = name.substr(0, pos);
872                         }
873                         
874                         //find unique name
875                         itUnique = uniqueNames.find(name);
876                         
877                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
878                         else {
879                                 name = itUnique->second;
880                                 //is this name already in the file
881                                 itNames = namesInFile.find((name));
882                                 
883                                 if (itNames == namesInFile.end()) { //no not in file
884                                         if (flag == "N") { //are you really a no??
885                                                 //is this sequence really not chimeric??
886                                                 itChimeras = chimerasInFile.find(name);
887                                                 
888                                                 //then you really are a no so print, otherwise skip
889                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
890                                         }else{ print = true; }
891                                 }
892                         }
893                         
894                         if (print) {
895                                 out << temp1 << '\t' << name << restOfName << '\t';
896                                 namesInFile.insert(name);
897                                 
898                                 //parse parent1 names
899                                 if (parent1 != "*") {
900                                         restOfName = "";
901                                         pos = parent1.find_first_of('/');
902                                         if (pos != string::npos) {
903                                                 restOfName = parent1.substr(pos);
904                                                 parent1 = parent1.substr(0, pos);
905                                         }
906                                         
907                                         itUnique = uniqueNames.find(parent1);
908                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909                                         else {  out << itUnique->second << restOfName << '\t';  }
910                                 }else { out << parent1 << '\t'; }
911                                 
912                                 //parse parent2 names
913                                 if (parent2 != "*") {
914                                         restOfName = "";
915                                         pos = parent2.find_first_of('/');
916                                         if (pos != string::npos) {
917                                                 restOfName = parent2.substr(pos);
918                                                 parent2 = parent2.substr(0, pos);
919                                         }
920                                         
921                                         itUnique = uniqueNames.find(parent2);
922                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
923                                         else {  out << itUnique->second << restOfName << '\t';  }
924                                 }else { out << parent2 << '\t'; }
925                                 
926                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
927                         }
928                 }
929                 in.close();
930                 out.close();
931                 
932                 m->mothurRemove(outputFileName);
933                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
934                 
935                                 
936                 //edit anls file
937                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
938                 /*
939                  ------------------------------------------------------------------------
940                  Query   (  179 nt) F21Fcsw_11639/ab=591/
941                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
942                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
943                  
944                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
945                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
946                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
947                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
948                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
949                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
950                  
951                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
952                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
953                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
954                  Diffs      NNN     N N  N                   N  N BB    NNN                              
955                  Votes      000     0 0  0                   0  0 ++    000                              
956                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
957                  
958                  A   159 TGGAACTGAGACACGGTCCAA 179
959                  Q   159 TGGAACTGAGACACGGTCCAA 179
960                  B   161 TGGAACTGAGACACGGTCCAA 181
961                  Diffs                        
962                  Votes                        
963                  Model   BBBBBBBBBBBBBBBBBBBBB
964                  
965                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
966                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
967                 */
968                 if (chimealns) {
969                         ifstream in3; 
970                         m->openInputFile(alnsFileName, in3);
971                 
972                         ofstream out3;
973                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
974                 
975                         name = "";
976                         namesInFile.clear();
977                         string line = "";
978                         
979                         while (!in3.eof()) {
980                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
981                                 
982                                 line = "";
983                                 line = m->getline(in3); 
984                                 string temp = "";
985                                 
986                                 if (line != "") {
987                                         istringstream iss(line);
988                                         iss >> temp;
989                                         
990                                         //are you a name line
991                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
992                                                 int spot = 0;
993                                                 for (int i = 0; i < line.length(); i++) {
994                                                         spot = i;
995                                                         if (line[i] == ')') { break; }
996                                                         else { out3 << line[i]; }
997                                                 }
998                                                 
999                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1000                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1001                                                 else {
1002                                                         out << line[spot] << line[spot+1];
1003                                                         
1004                                                         name = line.substr(spot+2);
1005                                                         
1006                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1007                                                         string restOfName = "";
1008                                                         int pos = name.find_first_of('/');
1009                                                         if (pos != string::npos) {
1010                                                                 restOfName = name.substr(pos);
1011                                                                 name = name.substr(0, pos);
1012                                                         }
1013                                                         
1014                                                         //find unique name
1015                                                         itUnique = uniqueNames.find(name);
1016                                                         
1017                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1018                                                         else {
1019                                                                 //only limit repeats on query names
1020                                                                 if (temp == "Query") {
1021                                                                         itNames = namesInFile.find((itUnique->second));
1022                                                                         
1023                                                                         if (itNames == namesInFile.end()) {
1024                                                                                 out << itUnique->second << restOfName << endl;
1025                                                                                 namesInFile.insert((itUnique->second));
1026                                                                         }
1027                                                                 }else { out << itUnique->second << restOfName << endl;  }
1028                                                         }
1029                                                         
1030                                                 }
1031                                                 
1032                                         }else { //not need to alter line
1033                                                 out3 << line << endl;
1034                                         }
1035                                 }else { out3 << endl; }
1036                         }
1037                         in3.close();
1038                         out3.close();
1039                         
1040                         m->mothurRemove(alnsFileName);
1041                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1042                 }
1043                 
1044                 return total;
1045         }
1046         catch(exception& e) {
1047                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1048                 exit(1);
1049         }
1050 }       
1051 //**********************************************************************************************************************
1052 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1053         try {
1054                 
1055                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1056                 
1057                 ofstream out;
1058                 m->openOutputFile(filename, out);
1059                 
1060                 //print new file in order of
1061                 for (int i = 0; i < nameMapCount.size(); i++) {
1062                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1063                 }
1064                 out.close();
1065                 
1066                 return 0;
1067         }
1068         catch(exception& e) {
1069                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1070                 exit(1);
1071         }
1072 }       
1073 //**********************************************************************************************************************
1074 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1075         try {
1076                 //create input file for uchime
1077                 //read through fastafile and store info
1078                 ifstream in;
1079                 m->openInputFile(filename, in);
1080                 
1081                 while (!in.eof()) {
1082                         
1083                         if (m->control_pressed) { in.close(); return 0; }
1084                         
1085                         Sequence seq(in); m->gobble(in);
1086                         seqs[seq.getName()] = seq.getAligned();
1087                 }
1088                 in.close();
1089                 
1090                 return 0;
1091         }
1092         catch(exception& e) {
1093                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1094                 exit(1);
1095         }
1096 }       
1097 //**********************************************************************************************************************
1098
1099 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1100         try {
1101                 string nameFile = "";
1102                 
1103                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1104                 
1105                 //use unique.seqs to create new name and fastafile
1106                 string inputString = "fasta=" + inputFile;
1107                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1108                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1109                 m->mothurCalling = true;
1110         
1111                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1112                 uniqueCommand->execute();
1113                 
1114                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1115                 
1116                 delete uniqueCommand;
1117                 m->mothurCalling = false;
1118                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1119                 
1120                 nameFile = filenames["name"][0];
1121                 inputFile = filenames["fasta"][0];
1122                 
1123                 return nameFile;
1124         }
1125         catch(exception& e) {
1126                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1127                 exit(1);
1128         }
1129 }
1130 //**********************************************************************************************************************
1131 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1132         try {
1133                 
1134                 int totalSeqs = 0;
1135                 int numChimeras = 0;
1136                 
1137                 for (int i = start; i < end; i++) {
1138                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
1139             
1140                         int error;
1141             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1142             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1143                         
1144                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1145                         totalSeqs += numSeqs;
1146                         
1147                         if (m->control_pressed) { return 0; }
1148                         
1149                         //remove file made for uchime
1150                         if (!m->debug) {  m->mothurRemove(filename);  }
1151             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1152                         
1153                         //append files
1154                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1155                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1156                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1157                         
1158                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1159                 }       
1160                 return totalSeqs;
1161                 
1162         }
1163         catch(exception& e) {
1164                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1165                 exit(1);
1166         }
1167 }       
1168 //**********************************************************************************************************************
1169
1170 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1171         try {
1172                 
1173                 outputFName = m->getFullPathName(outputFName);
1174                 filename = m->getFullPathName(filename);
1175                 alns = m->getFullPathName(alns);
1176                 
1177                 //to allow for spaces in the path
1178                 outputFName = "\"" + outputFName + "\"";
1179                 filename = "\"" + filename + "\"";
1180                 alns = "\"" + alns + "\"";
1181                                 
1182                 vector<char*> cPara;
1183                 
1184                 string uchimeCommand = uchimeLocation;
1185         uchimeCommand = "\"" + uchimeCommand + "\" ";
1186         
1187         char* tempUchime;
1188                 tempUchime= new char[uchimeCommand.length()+1]; 
1189                 *tempUchime = '\0';
1190                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1191                 cPara.push_back(tempUchime);
1192                 
1193         //are you using a reference file
1194                 if (templatefile != "self") {
1195             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1196             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1197             filename = outputFileName;
1198             filename = "\"" + filename + "\"";
1199                         //add reference file
1200                         char* tempRef = new char[5]; 
1201                         //strcpy(tempRef, "--db"); 
1202                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1203                         cPara.push_back(tempRef);  
1204                         char* tempR = new char[templatefile.length()+1];
1205                         //strcpy(tempR, templatefile.c_str());
1206                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1207                         cPara.push_back(tempR);
1208                 }
1209                 
1210                 char* tempIn = new char[8]; 
1211                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1212                 //strcpy(tempIn, "--input"); 
1213                 cPara.push_back(tempIn);
1214                 char* temp = new char[filename.length()+1];
1215                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1216                 //strcpy(temp, filename.c_str());
1217                 cPara.push_back(temp);
1218                 
1219                 char* tempO = new char[12]; 
1220                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1221                 //strcpy(tempO, "--uchimeout"); 
1222                 cPara.push_back(tempO);
1223                 char* tempout = new char[outputFName.length()+1];
1224                 //strcpy(tempout, outputFName.c_str());
1225                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1226                 cPara.push_back(tempout);
1227                 
1228                 if (chimealns) {
1229                         char* tempA = new char[13]; 
1230                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1231                         //strcpy(tempA, "--uchimealns"); 
1232                         cPara.push_back(tempA);
1233                         char* tempa = new char[alns.length()+1];
1234                         //strcpy(tempa, alns.c_str());
1235                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1236                         cPara.push_back(tempa);
1237                 }
1238         
1239         if (strand != "") {
1240                         char* tempA = new char[9]; 
1241                         *tempA = '\0'; strncat(tempA, "--strand", 8);
1242                         cPara.push_back(tempA);
1243                         char* tempa = new char[strand.length()+1];
1244                         *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
1245                         cPara.push_back(tempa);
1246                 }
1247                 
1248                 if (useAbskew) {
1249                         char* tempskew = new char[9];
1250                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1251                         //strcpy(tempskew, "--abskew"); 
1252                         cPara.push_back(tempskew);
1253                         char* tempSkew = new char[abskew.length()+1];
1254                         //strcpy(tempSkew, abskew.c_str());
1255                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1256                         cPara.push_back(tempSkew);
1257                 }
1258                 
1259                 if (useMinH) {
1260                         char* tempminh = new char[7]; 
1261                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1262                         //strcpy(tempminh, "--minh"); 
1263                         cPara.push_back(tempminh);
1264                         char* tempMinH = new char[minh.length()+1];
1265                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1266                         //strcpy(tempMinH, minh.c_str());
1267                         cPara.push_back(tempMinH);
1268                 }
1269                 
1270                 if (useMindiv) {
1271                         char* tempmindiv = new char[9]; 
1272                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1273                         //strcpy(tempmindiv, "--mindiv"); 
1274                         cPara.push_back(tempmindiv);
1275                         char* tempMindiv = new char[mindiv.length()+1];
1276                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1277                         //strcpy(tempMindiv, mindiv.c_str());
1278                         cPara.push_back(tempMindiv);
1279                 }
1280                 
1281                 if (useXn) {
1282                         char* tempxn = new char[5]; 
1283                         //strcpy(tempxn, "--xn"); 
1284                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1285                         cPara.push_back(tempxn);
1286                         char* tempXn = new char[xn.length()+1];
1287                         //strcpy(tempXn, xn.c_str());
1288                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1289                         cPara.push_back(tempXn);
1290                 }
1291                 
1292                 if (useDn) {
1293                         char* tempdn = new char[5]; 
1294                         //strcpy(tempdn, "--dn"); 
1295                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1296                         cPara.push_back(tempdn);
1297                         char* tempDn = new char[dn.length()+1];
1298                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1299                         //strcpy(tempDn, dn.c_str());
1300                         cPara.push_back(tempDn);
1301                 }
1302                 
1303                 if (useXa) {
1304                         char* tempxa = new char[5]; 
1305                         //strcpy(tempxa, "--xa"); 
1306                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1307                         cPara.push_back(tempxa);
1308                         char* tempXa = new char[xa.length()+1];
1309                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1310                         //strcpy(tempXa, xa.c_str());
1311                         cPara.push_back(tempXa);
1312                 }
1313                 
1314                 if (useChunks) {
1315                         char* tempchunks = new char[9]; 
1316                         //strcpy(tempchunks, "--chunks"); 
1317                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1318                         cPara.push_back(tempchunks);
1319                         char* tempChunks = new char[chunks.length()+1];
1320                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1321                         //strcpy(tempChunks, chunks.c_str());
1322                         cPara.push_back(tempChunks);
1323                 }
1324                 
1325                 if (useMinchunk) {
1326                         char* tempminchunk = new char[11]; 
1327                         //strcpy(tempminchunk, "--minchunk"); 
1328                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1329                         cPara.push_back(tempminchunk);
1330                         char* tempMinchunk = new char[minchunk.length()+1];
1331                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1332                         //strcpy(tempMinchunk, minchunk.c_str());
1333                         cPara.push_back(tempMinchunk);
1334                 }
1335                 
1336                 if (useIdsmoothwindow) {
1337                         char* tempidsmoothwindow = new char[17]; 
1338                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1339                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1340                         cPara.push_back(tempidsmoothwindow);
1341                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1342                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1343                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1344                         cPara.push_back(tempIdsmoothwindow);
1345                 }
1346                 
1347                 /*if (useMinsmoothid) {
1348                         char* tempminsmoothid = new char[14]; 
1349                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1350                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1351                         cPara.push_back(tempminsmoothid);
1352                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1353                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1354                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1355                         cPara.push_back(tempMinsmoothid);
1356                 }*/
1357                 
1358                 if (useMaxp) {
1359                         char* tempmaxp = new char[7]; 
1360                         //strcpy(tempmaxp, "--maxp"); 
1361                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1362                         cPara.push_back(tempmaxp);
1363                         char* tempMaxp = new char[maxp.length()+1];
1364                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1365                         //strcpy(tempMaxp, maxp.c_str());
1366                         cPara.push_back(tempMaxp);
1367                 }
1368                 
1369                 if (!skipgaps) {
1370                         char* tempskipgaps = new char[13]; 
1371                         //strcpy(tempskipgaps, "--[no]skipgaps");
1372                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1373                         cPara.push_back(tempskipgaps);
1374                 }
1375                 
1376                 if (!skipgaps2) {
1377                         char* tempskipgaps2 = new char[14]; 
1378                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1379                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1380                         cPara.push_back(tempskipgaps2);
1381                 }
1382                 
1383                 if (useMinlen) {
1384                         char* tempminlen = new char[9]; 
1385                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1386                         //strcpy(tempminlen, "--minlen"); 
1387                         cPara.push_back(tempminlen);
1388                         char* tempMinlen = new char[minlen.length()+1];
1389                         //strcpy(tempMinlen, minlen.c_str());
1390                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1391                         cPara.push_back(tempMinlen);
1392                 }
1393                 
1394                 if (useMaxlen) {
1395                         char* tempmaxlen = new char[9]; 
1396                         //strcpy(tempmaxlen, "--maxlen"); 
1397                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1398                         cPara.push_back(tempmaxlen);
1399                         char* tempMaxlen = new char[maxlen.length()+1];
1400                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1401                         //strcpy(tempMaxlen, maxlen.c_str());
1402                         cPara.push_back(tempMaxlen);
1403                 }
1404                 
1405                 if (ucl) {
1406                         char* tempucl = new char[5]; 
1407                         strcpy(tempucl, "--ucl"); 
1408                         cPara.push_back(tempucl);
1409                 }
1410                 
1411                 if (useQueryfract) {
1412                         char* tempqueryfract = new char[13]; 
1413                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1414                         //strcpy(tempqueryfract, "--queryfract"); 
1415                         cPara.push_back(tempqueryfract);
1416                         char* tempQueryfract = new char[queryfract.length()+1];
1417                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1418                         //strcpy(tempQueryfract, queryfract.c_str());
1419                         cPara.push_back(tempQueryfract);
1420                 }
1421                 
1422                 
1423                 char** uchimeParameters;
1424                 uchimeParameters = new char*[cPara.size()];
1425                 string commandString = "";
1426                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1427                 //int numArgs = cPara.size();
1428                 
1429                 //uchime_main(numArgs, uchimeParameters); 
1430                 //cout << "commandString = " << commandString << endl;
1431 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1432 #else
1433                 commandString = "\"" + commandString + "\"";
1434 #endif
1435         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1436                 system(commandString.c_str());
1437                 
1438                 //free memory
1439                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1440                 delete[] uchimeParameters; 
1441                 
1442                 //remove "" from filenames
1443                 outputFName = outputFName.substr(1, outputFName.length()-2);
1444                 filename = filename.substr(1, filename.length()-2);
1445                 alns = alns.substr(1, alns.length()-2);
1446                 
1447                 if (m->control_pressed) { return 0; }
1448                 
1449                 //create accnos file from uchime results
1450                 ifstream in; 
1451                 m->openInputFile(outputFName, in);
1452                 
1453                 ofstream out;
1454                 m->openOutputFile(accnos, out);
1455                 
1456                 int num = 0;
1457                 numChimeras = 0;
1458                 while(!in.eof()) {
1459                         
1460                         if (m->control_pressed) { break; }
1461                         
1462                         string name = "";
1463                         string chimeraFlag = "";
1464                         //in >> chimeraFlag >> name;
1465                         
1466             string line = m->getline(in);
1467             vector<string> pieces = m->splitWhiteSpace(line);
1468             if (pieces.size() > 2) { 
1469                 name = pieces[1];
1470                 //fix name if needed
1471                 if (templatefile == "self") { 
1472                     name = name.substr(0, name.length()-1); //rip off last /
1473                     name = name.substr(0, name.find_last_of('/'));
1474                 }
1475                 
1476                 chimeraFlag = pieces[pieces.size()-1];
1477                         }
1478                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1479                         m->gobble(in);
1480                         
1481                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1482                         num++;
1483                 }
1484                 in.close();
1485                 out.close();
1486                 
1487         //if (templatefile != "self") {  m->mothurRemove(filename); }
1488         
1489                 return num;
1490         }
1491         catch(exception& e) {
1492                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1493                 exit(1);
1494         }
1495 }
1496 /**************************************************************************************************/
1497 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1498 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1499         try {
1500         
1501         ifstream in;
1502         m->openInputFile(filename, in);
1503         
1504         ofstream out;
1505         m->openOutputFile(output, out);
1506         
1507         while (!in.eof()) {
1508             if (m->control_pressed) { break;  }
1509             
1510             Sequence seq(in); m->gobble(in);
1511             
1512             if (seq.getName() != "") { seq.printSequence(out); }
1513         }
1514         in.close();
1515         out.close();
1516         
1517         return 0;
1518     }
1519         catch(exception& e) {
1520                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1521                 exit(1);
1522         }
1523 }
1524 /**************************************************************************************************/
1525
1526 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1527         try {
1528                 
1529                 processIDS.clear();
1530                 int process = 1;
1531                 int num = 0;
1532                 vector<string> files;
1533                 
1534 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1535                 //break up file into multiple files
1536                 m->divideFile(filename, processors, files);
1537                 
1538                 if (m->control_pressed) {  return 0;  }
1539                                 
1540                 //loop through and create all the processes you want
1541                 while (process != processors) {
1542                         int pid = fork();
1543                         
1544                         if (pid > 0) {
1545                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1546                                 process++;
1547                         }else if (pid == 0){
1548                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1549                                 
1550                                 //pass numSeqs to parent
1551                                 ofstream out;
1552                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1553                                 m->openOutputFile(tempFile, out);
1554                                 out << num << endl;
1555                                 out << numChimeras << endl;
1556                                 out.close();
1557                                 
1558                                 exit(0);
1559                         }else { 
1560                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1561                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1562                                 exit(0);
1563                         }
1564                 }
1565                 
1566                 //do my part
1567                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1568                 
1569                 //force parent to wait until all the processes are done
1570                 for (int i=0;i<processIDS.size();i++) { 
1571                         int temp = processIDS[i];
1572                         wait(&temp);
1573                 }
1574                 
1575                 for (int i = 0; i < processIDS.size(); i++) {
1576                         ifstream in;
1577                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1578                         m->openInputFile(tempFile, in);
1579                         if (!in.eof()) { 
1580                                 int tempNum = 0; 
1581                                 in >> tempNum; m->gobble(in);
1582                                 num += tempNum; 
1583                                 in >> tempNum;
1584                                 numChimeras += tempNum;
1585                         }
1586                         in.close(); m->mothurRemove(tempFile);
1587                 }
1588 #else
1589                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1590                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1591                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1592                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1593                 
1594                 //divide file
1595                 int count = 0;
1596                 int spot = 0;
1597                 map<int, ofstream*> filehandles;
1598                 map<int, ofstream*>::iterator it3;
1599                 
1600                 ofstream* temp;
1601                 for (int i = 0; i < processors; i++) {
1602                         temp = new ofstream;
1603                         filehandles[i] = temp;
1604                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1605                         files.push_back(filename+toString(i)+".temp");
1606                 }
1607                 
1608                 ifstream in;
1609                 m->openInputFile(filename, in);
1610                 
1611                 while(!in.eof()) {
1612                         
1613                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1614                         
1615                         Sequence tempSeq(in); m->gobble(in); 
1616                         
1617                         if (tempSeq.getName() != "") {
1618                                 tempSeq.printSequence(*(filehandles[spot])); 
1619                                 spot++; count++;
1620                                 if (spot == processors) { spot = 0; }
1621                         }
1622                 }
1623                 in.close();
1624                 
1625                 //delete memory
1626                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1627                         (*(it3->second)).close();
1628                         delete it3->second;
1629                 }
1630                 
1631                 //sanity check for number of processors
1632                 if (count < processors) { processors = count; }
1633                 
1634                 vector<uchimeData*> pDataArray; 
1635                 DWORD   dwThreadIdArray[processors-1];
1636                 HANDLE  hThreadArray[processors-1]; 
1637                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1638                 
1639                 //Create processor worker threads.
1640                 for( int i=1; i<processors; i++ ){
1641                         // Allocate memory for thread data.
1642                         string extension = toString(i) + ".temp";
1643                         
1644                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1645                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1646                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1647                         
1648                         pDataArray.push_back(tempUchime);
1649                         processIDS.push_back(i);
1650                         
1651                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1652                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1653                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1654                 }
1655                 
1656                 
1657                 //using the main process as a worker saves time and memory
1658                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1659                 
1660                 //Wait until all threads have terminated.
1661                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1662                 
1663                 //Close all thread handles and free memory allocations.
1664                 for(int i=0; i < pDataArray.size(); i++){
1665                         num += pDataArray[i]->count;
1666                         numChimeras += pDataArray[i]->numChimeras;
1667                         CloseHandle(hThreadArray[i]);
1668                         delete pDataArray[i];
1669                 }
1670 #endif          
1671                 
1672                 //append output files
1673                 for(int i=0;i<processIDS.size();i++){
1674                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1675                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1676                         
1677                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1678                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1679                         
1680                         if (chimealns) {
1681                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1682                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1683                         }
1684                 }
1685                 
1686                 //get rid of the file pieces.
1687                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1688                 return num;     
1689         }
1690         catch(exception& e) {
1691                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1692                 exit(1);
1693         }
1694 }
1695 /**************************************************************************************************/
1696
1697 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1698         try {
1699                 
1700                 processIDS.clear();
1701                 int process = 1;
1702                 int num = 0;
1703                 
1704                 //sanity check
1705                 if (groups.size() < processors) { processors = groups.size(); }
1706                 
1707                 //divide the groups between the processors
1708                 vector<linePair> lines;
1709                 int numGroupsPerProcessor = groups.size() / processors;
1710                 for (int i = 0; i < processors; i++) {
1711                         int startIndex =  i * numGroupsPerProcessor;
1712                         int endIndex = (i+1) * numGroupsPerProcessor;
1713                         if(i == (processors - 1)){      endIndex = groups.size();       }
1714                         lines.push_back(linePair(startIndex, endIndex));
1715                 }
1716                 
1717 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1718                                 
1719                 //loop through and create all the processes you want
1720                 while (process != processors) {
1721                         int pid = fork();
1722                         
1723                         if (pid > 0) {
1724                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1725                                 process++;
1726                         }else if (pid == 0){
1727                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1728                                 
1729                                 //pass numSeqs to parent
1730                                 ofstream out;
1731                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1732                                 m->openOutputFile(tempFile, out);
1733                                 out << num << endl;
1734                                 out.close();
1735                                 
1736                                 exit(0);
1737                         }else { 
1738                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1739                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1740                                 exit(0);
1741                         }
1742                 }
1743                 
1744                 //do my part
1745                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1746                 
1747                 //force parent to wait until all the processes are done
1748                 for (int i=0;i<processIDS.size();i++) { 
1749                         int temp = processIDS[i];
1750                         wait(&temp);
1751                 }
1752                 
1753                 for (int i = 0; i < processIDS.size(); i++) {
1754                         ifstream in;
1755                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1756                         m->openInputFile(tempFile, in);
1757                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1758                         in.close(); m->mothurRemove(tempFile);
1759                 }
1760                                 
1761 #else
1762                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1763                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1764                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1765                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1766                 
1767                 vector<uchimeData*> pDataArray; 
1768                 DWORD   dwThreadIdArray[processors-1];
1769                 HANDLE  hThreadArray[processors-1]; 
1770                 
1771                 //Create processor worker threads.
1772                 for( int i=1; i<processors; i++ ){
1773                         // Allocate memory for thread data.
1774                         string extension = toString(i) + ".temp";
1775                         
1776                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1777                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1778                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
1779                         
1780                         pDataArray.push_back(tempUchime);
1781                         processIDS.push_back(i);
1782                         
1783                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1784                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1785                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1786                 }
1787                 
1788                 
1789                 //using the main process as a worker saves time and memory
1790                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1791                 
1792                 //Wait until all threads have terminated.
1793                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1794                 
1795                 //Close all thread handles and free memory allocations.
1796                 for(int i=0; i < pDataArray.size(); i++){
1797                         num += pDataArray[i]->count;
1798                         CloseHandle(hThreadArray[i]);
1799                         delete pDataArray[i];
1800                 }
1801 #endif          
1802                 
1803                                 
1804                 //append output files
1805                 for(int i=0;i<processIDS.size();i++){
1806                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1807                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1808                         
1809                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1810                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1811                         
1812                         if (chimealns) {
1813                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1814                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1815                         }
1816                 }
1817                 
1818                 return num;     
1819                 
1820         }
1821         catch(exception& e) {
1822                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1823                 exit(1);
1824         }
1825 }
1826 /**************************************************************************************************/
1827