]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
added dereplicate parameter to chimera.slayer and chimera.persues. added minnumsample...
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
29                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
30                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh);
31                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv);
32                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
33                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
34                 CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa);
35                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks);
36                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk);
37                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow);
38         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
39
40                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
41                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp);
42                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps);
43                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2);
44                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen);
45                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen);
46                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl);
47                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract);
48
49                 vector<string> myArray;
50                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string ChimeraUchimeCommand::getHelpString(){   
60         try {
61                 string helpString = "";
62                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
63                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
64                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
65                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
66                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
67         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
68                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
69                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
70         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
71                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
72                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
73                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
74                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
75                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
76                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
77                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
78                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
79                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
80                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
81                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
82                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
83                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
84                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
85                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
86                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
87                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
88                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
89                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
90                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
91 #ifdef USE_MPI
92                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
93 #endif
94                 helpString += "The chimera.uchime command should be in the following format: \n";
95                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
96                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
97                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
98                 return helpString;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 string ChimeraUchimeCommand::getOutputPattern(string type) {
107     try {
108         string pattern = "";
109         
110         if (type == "chimera") {  pattern = "[filename],uchime.chimeras"; } 
111         else if (type == "accnos") {  pattern = "[filename],uchime.accnos"; } 
112         else if (type == "alns") {  pattern = "[filename],uchime.alns"; } 
113         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
114         
115         return pattern;
116     }
117     catch(exception& e) {
118         m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern");
119         exit(1);
120     }
121 }
122 //**********************************************************************************************************************
123 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
124         try {
125                 abort = true; calledHelp = true;
126                 setParameters();
127                 vector<string> tempOutNames;
128                 outputTypes["chimera"] = tempOutNames;
129                 outputTypes["accnos"] = tempOutNames;
130                 outputTypes["alns"] = tempOutNames;
131         }
132         catch(exception& e) {
133                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
134                 exit(1);
135         }
136 }
137 //***************************************************************************************************************
138 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
139         try {
140                 abort = false; calledHelp = false; hasName=false; hasCount=false;
141                 ReferenceDB* rdb = ReferenceDB::getInstance();
142                 
143                 //allow user to run help
144                 if(option == "help") { help(); abort = true; calledHelp = true; }
145                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
146                 
147                 else {
148                         vector<string> myArray = setParameters();
149                         
150                         OptionParser parser(option);
151                         map<string,string> parameters = parser.getParameters();
152                         
153                         ValidParameters validParameter("chimera.uchime");
154                         map<string,string>::iterator it;
155                         
156                         //check to make sure all parameters are valid for command
157                         for (it = parameters.begin(); it != parameters.end(); it++) { 
158                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
159                         }
160                         
161                         vector<string> tempOutNames;
162                         outputTypes["chimera"] = tempOutNames;
163                         outputTypes["accnos"] = tempOutNames;
164                         outputTypes["alns"] = tempOutNames;
165                         
166                         //if the user changes the input directory command factory will send this info to us in the output parameter 
167                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
168                         if (inputDir == "not found"){   inputDir = "";          }
169                         
170                         //check for required parameters
171                         fastafile = validParameter.validFile(parameters, "fasta", false);
172                         if (fastafile == "not found") {                                 
173                                 //if there is a current fasta file, use it
174                                 string filename = m->getFastaFile(); 
175                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
176                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
177                         }else { 
178                                 m->splitAtDash(fastafile, fastaFileNames);
179                                 
180                                 //go through files and make sure they are good, if not, then disregard them
181                                 for (int i = 0; i < fastaFileNames.size(); i++) {
182                                         
183                                         bool ignore = false;
184                                         if (fastaFileNames[i] == "current") { 
185                                                 fastaFileNames[i] = m->getFastaFile(); 
186                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
187                                                 else {  
188                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
189                                                         //erase from file list
190                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
191                                                         i--;
192                                                 }
193                                         }
194                                         
195                                         if (!ignore) {
196                                                 
197                                                 if (inputDir != "") {
198                                                         string path = m->hasPath(fastaFileNames[i]);
199                                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
201                                                 }
202                                                 
203                                                 int ableToOpen;
204                                                 ifstream in;
205                                                 
206                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
207                                                 
208                                                 //if you can't open it, try default location
209                                                 if (ableToOpen == 1) {
210                                                         if (m->getDefaultPath() != "") { //default path is set
211                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
212                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
213                                                                 ifstream in2;
214                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
215                                                                 in2.close();
216                                                                 fastaFileNames[i] = tryPath;
217                                                         }
218                                                 }
219                                                 
220                                                 if (ableToOpen == 1) {
221                                                         if (m->getOutputDir() != "") { //default path is set
222                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
223                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
224                                                                 ifstream in2;
225                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
226                                                                 in2.close();
227                                                                 fastaFileNames[i] = tryPath;
228                                                         }
229                                                 }
230                                                 
231                                                 in.close();
232                                                 
233                                                 if (ableToOpen == 1) { 
234                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
235                                                         //erase from file list
236                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
237                                                         i--;
238                                                 }else {
239                                                         m->setFastaFile(fastaFileNames[i]);
240                                                 }
241                                         }
242                                 }
243                                 
244                                 //make sure there is at least one valid file left
245                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
246                         }
247                         
248                         
249                         //check for required parameters
250                         namefile = validParameter.validFile(parameters, "name", false);
251                         if (namefile == "not found") { namefile = "";   }
252                         else { 
253                                 m->splitAtDash(namefile, nameFileNames);
254                                 
255                                 //go through files and make sure they are good, if not, then disregard them
256                                 for (int i = 0; i < nameFileNames.size(); i++) {
257                                         
258                                         bool ignore = false;
259                                         if (nameFileNames[i] == "current") { 
260                                                 nameFileNames[i] = m->getNameFile(); 
261                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
262                                                 else {  
263                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
264                                                         //erase from file list
265                                                         nameFileNames.erase(nameFileNames.begin()+i);
266                                                         i--;
267                                                 }
268                                         }
269                                         
270                                         if (!ignore) {
271                                                 
272                                                 if (inputDir != "") {
273                                                         string path = m->hasPath(nameFileNames[i]);
274                                                         //if the user has not given a path then, add inputdir. else leave path alone.
275                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
276                                                 }
277                                                 
278                                                 int ableToOpen;
279                                                 ifstream in;
280                                                 
281                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
282                                                 
283                                                 //if you can't open it, try default location
284                                                 if (ableToOpen == 1) {
285                                                         if (m->getDefaultPath() != "") { //default path is set
286                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
287                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
288                                                                 ifstream in2;
289                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
290                                                                 in2.close();
291                                                                 nameFileNames[i] = tryPath;
292                                                         }
293                                                 }
294                                                 
295                                                 if (ableToOpen == 1) {
296                                                         if (m->getOutputDir() != "") { //default path is set
297                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
298                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
299                                                                 ifstream in2;
300                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
301                                                                 in2.close();
302                                                                 nameFileNames[i] = tryPath;
303                                                         }
304                                                 }
305                                                 
306                                                 in.close();
307                                                 
308                                                 if (ableToOpen == 1) { 
309                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
310                                                         //erase from file list
311                                                         nameFileNames.erase(nameFileNames.begin()+i);
312                                                         i--;
313                                                 }else {
314                                                         m->setNameFile(nameFileNames[i]);
315                                                 }
316                                         }
317                                 }
318                         }
319             
320             if (nameFileNames.size() != 0) { hasName = true; }
321             
322             //check for required parameters
323             vector<string> countfileNames;
324                         countfile = validParameter.validFile(parameters, "count", false);
325                         if (countfile == "not found") { 
326                 countfile = "";  
327                         }else { 
328                                 m->splitAtDash(countfile, countfileNames);
329                                 
330                                 //go through files and make sure they are good, if not, then disregard them
331                                 for (int i = 0; i < countfileNames.size(); i++) {
332                                         
333                                         bool ignore = false;
334                                         if (countfileNames[i] == "current") { 
335                                                 countfileNames[i] = m->getCountTableFile(); 
336                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
337                                                 else {  
338                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
339                                                         //erase from file list
340                                                         countfileNames.erase(countfileNames.begin()+i);
341                                                         i--;
342                                                 }
343                                         }
344                                         
345                                         if (!ignore) {
346                                                 
347                                                 if (inputDir != "") {
348                                                         string path = m->hasPath(countfileNames[i]);
349                                                         //if the user has not given a path then, add inputdir. else leave path alone.
350                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
351                                                 }
352                                                 
353                                                 int ableToOpen;
354                                                 ifstream in;
355                                                 
356                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
357                                                 
358                                                 //if you can't open it, try default location
359                                                 if (ableToOpen == 1) {
360                                                         if (m->getDefaultPath() != "") { //default path is set
361                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
362                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
363                                                                 ifstream in2;
364                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
365                                                                 in2.close();
366                                                                 countfileNames[i] = tryPath;
367                                                         }
368                                                 }
369                                                 
370                                                 if (ableToOpen == 1) {
371                                                         if (m->getOutputDir() != "") { //default path is set
372                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
373                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
374                                                                 ifstream in2;
375                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
376                                                                 in2.close();
377                                                                 countfileNames[i] = tryPath;
378                                                         }
379                                                 }
380                                                 
381                                                 in.close();
382                                                 
383                                                 if (ableToOpen == 1) { 
384                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
385                                                         //erase from file list
386                                                         countfileNames.erase(countfileNames.begin()+i);
387                                                         i--;
388                                                 }else {
389                                                         m->setCountTableFile(countfileNames[i]);
390                                                 }
391                                         }
392                                 }
393                         }
394             
395             if (countfileNames.size() != 0) { hasCount = true; }
396             
397                         //make sure there is at least one valid file left
398             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
399             
400             if (!hasName && hasCount) { nameFileNames = countfileNames; }
401             
402                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
403                         
404                         bool hasGroup = true;
405                         groupfile = validParameter.validFile(parameters, "group", false);
406                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
407                         else { 
408                                 m->splitAtDash(groupfile, groupFileNames);
409                                 
410                                 //go through files and make sure they are good, if not, then disregard them
411                                 for (int i = 0; i < groupFileNames.size(); i++) {
412                                         
413                                         bool ignore = false;
414                                         if (groupFileNames[i] == "current") { 
415                                                 groupFileNames[i] = m->getGroupFile(); 
416                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
417                                                 else {  
418                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
419                                                         //erase from file list
420                                                         groupFileNames.erase(groupFileNames.begin()+i);
421                                                         i--;
422                                                 }
423                                         }
424                                         
425                                         if (!ignore) {
426                                                 
427                                                 if (inputDir != "") {
428                                                         string path = m->hasPath(groupFileNames[i]);
429                                                         //if the user has not given a path then, add inputdir. else leave path alone.
430                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
431                                                 }
432                                                 
433                                                 int ableToOpen;
434                                                 ifstream in;
435                                                 
436                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
437                                                 
438                                                 //if you can't open it, try default location
439                                                 if (ableToOpen == 1) {
440                                                         if (m->getDefaultPath() != "") { //default path is set
441                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
442                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
443                                                                 ifstream in2;
444                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
445                                                                 in2.close();
446                                                                 groupFileNames[i] = tryPath;
447                                                         }
448                                                 }
449                                                 
450                                                 if (ableToOpen == 1) {
451                                                         if (m->getOutputDir() != "") { //default path is set
452                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
453                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
454                                                                 ifstream in2;
455                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
456                                                                 in2.close();
457                                                                 groupFileNames[i] = tryPath;
458                                                         }
459                                                 }
460                                                 
461                                                 in.close();
462                                                 
463                                                 if (ableToOpen == 1) { 
464                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
465                                                         //erase from file list
466                                                         groupFileNames.erase(groupFileNames.begin()+i);
467                                                         i--;
468                                                 }else {
469                                                         m->setGroupFile(groupFileNames[i]);
470                                                 }
471                                         }
472                                 }
473                                 
474                                 //make sure there is at least one valid file left
475                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
476                         }
477                         
478                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
479                         
480             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
481                         //if the user changes the output directory command factory will send this info to us in the output parameter 
482                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
483                         
484                         
485                         //if the user changes the output directory command factory will send this info to us in the output parameter 
486                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
487                         
488                         string path;
489                         it = parameters.find("reference");
490                         //user has given a template file
491                         if(it != parameters.end()){ 
492                                 if (it->second == "self") { templatefile = "self"; }
493                                 else {
494                                         path = m->hasPath(it->second);
495                                         //if the user has not given a path then, add inputdir. else leave path alone.
496                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
497                                         
498                                         templatefile = validParameter.validFile(parameters, "reference", true);
499                                         if (templatefile == "not open") { abort = true; }
500                                         else if (templatefile == "not found") { //check for saved reference sequences
501                                                 if (rdb->getSavedReference() != "") {
502                                                         templatefile = rdb->getSavedReference();
503                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
504                                                 }else {
505                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
506                                                         m->mothurOutEndLine();
507                                                         abort = true; 
508                                                 }
509                                         }
510                                 }
511                         }else if (hasName) {  templatefile = "self"; }
512             else if (hasCount) {  templatefile = "self"; }
513                         else { 
514                                 if (rdb->getSavedReference() != "") {
515                                         templatefile = rdb->getSavedReference();
516                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
517                                 }else {
518                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
519                                         m->mothurOutEndLine();
520                                         templatefile = ""; abort = true; 
521                                 } 
522                         }
523                                 
524                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
525                         m->setProcessors(temp);
526                         m->mothurConvert(temp, processors);
527                         
528                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
529                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
530                         
531                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
532                         chimealns = m->isTrue(temp); 
533                         
534                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
535                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
536                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
537                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
538                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
539                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
540                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
541                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
542                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
543                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
544                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
545                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
546                         
547                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
548                         ucl = m->isTrue(temp);
549                         
550                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
551                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
552                         
553                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
554                         skipgaps = m->isTrue(temp); 
555
556                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
557                         skipgaps2 = m->isTrue(temp); 
558             
559             
560                         temp = validParameter.validFile(parameters, "dereplicate", false);      
561                         if (temp == "not found") { 
562                                 if (groupfile != "")    {  temp = "false";                                      }
563                                 else                    {  temp = "true";       }
564                         }
565                         dups = m->isTrue(temp);
566
567                         
568                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
569                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
570                         
571                         //look for uchime exe
572                         path = m->argv;
573                         string tempPath = path;
574                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
575                         path = path.substr(0, (tempPath.find_last_of('m')));
576                         
577                         string uchimeCommand;
578 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
579                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
580             if (m->debug) { 
581                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
582                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
583                 newCommand->execute();
584                 delete newCommand;
585                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
586                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
587                 newCommand->execute();
588                 delete newCommand;
589             }
590 #else
591                         uchimeCommand = path + "uchime.exe";
592 #endif
593         
594                         //test to make sure uchime exists
595                         ifstream in;
596                         uchimeCommand = m->getFullPathName(uchimeCommand);
597                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
598                         if(ableToOpen == 1) {   
599                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
600                 //check to see if uchime is in the path??
601                 
602                 string uLocation = m->findProgramPath("uchime");
603                 
604                 
605                 ifstream in2;
606 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
607                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
608 #else
609                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
610 #endif
611
612                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
613                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
614             }else {  uchimeLocation = uchimeCommand; }
615             
616             uchimeLocation = m->getFullPathName(uchimeLocation);
617         }
618         }
619         catch(exception& e) {
620                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
621                 exit(1);
622         }
623 }
624 //***************************************************************************************************************
625
626 int ChimeraUchimeCommand::execute(){
627         try{
628         
629         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
630                 
631                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
632                 
633                 for (int s = 0; s < fastaFileNames.size(); s++) {
634                         
635                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
636                         
637                         int start = time(NULL); 
638                         string nameFile = "";
639                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
640                         map<string, string> variables; 
641             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
642                         string outputFileName = getOutputFileName("chimera", variables);
643                         string accnosFileName = getOutputFileName("accnos", variables);
644                         string alnsFileName = getOutputFileName("alns", variables);
645                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
646                                 
647                         //you provided a groupfile
648                         string groupFile = "";
649             bool hasGroup = false;
650                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
651             else if (hasCount) {
652                 CountTable ct;
653                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
654             }
655                         
656                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
657
658                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
659                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
660                                         nameFile = nameFileNames[s];
661                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
662                                                                                 
663                                 map<string, string> seqs;  
664                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
665
666                                 //read namefile
667                                 vector<seqPriorityNode> nameMapCount;
668                 int error;
669                 if (hasCount) {
670                     CountTable ct;
671                     ct.readTable(nameFile);
672                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
673                         int num = ct.getNumSeqs(it->first);
674                         if (num == 0) { error = 1; }
675                         else {
676                             seqPriorityNode temp(num, it->second, it->first);
677                             nameMapCount.push_back(temp);
678                         }
679                     }
680                 }else {
681                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
682                 }
683                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
684                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
685                                 
686                                 printFile(nameMapCount, newFasta);
687                                 fastaFileNames[s] = newFasta;
688                         }
689                         
690                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
691                         
692                         if (hasGroup) {
693                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
694                                         nameFile = nameFileNames[s];
695                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
696                                 
697                                 //Parse sequences by group
698                 vector<string> groups;
699                 map<string, string> uniqueNames;
700                 if (hasCount) {
701                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
702                     groups = cparser->getNamesOfGroups();
703                     uniqueNames = cparser->getAllSeqsMap();
704                 }else{
705                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
706                     groups = sparser->getNamesOfGroups();
707                     uniqueNames = sparser->getAllSeqsMap();
708                 }
709                                         
710                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
711                                                                 
712                                 //clears files
713                                 ofstream out, out1, out2;
714                                 m->openOutputFile(outputFileName, out); out.close(); 
715                                 m->openOutputFile(accnosFileName, out1); out1.close();
716                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
717                                 int totalSeqs = 0;
718                                 
719                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
720                                 else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
721
722                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
723                 if (hasCount) { delete cparser; }
724                 else { delete sparser; }
725                 
726                 if (!dups) { 
727                     int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
728                                 
729                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");      m->mothurOutEndLine();
730                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
731                                 }
732                 
733                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
734                                         
735                         }else{
736                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
737                         
738                                 int numSeqs = 0;
739                                 int numChimeras = 0;
740
741                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
742                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
743                                 
744                                 //add headings
745                                 ofstream out;
746                                 m->openOutputFile(outputFileName+".temp", out); 
747                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
748                                 out.close();
749                                 
750                                 m->appendFiles(outputFileName, outputFileName+".temp");
751                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
752                                 
753                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
754                         
755                                 //remove file made for uchime
756                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
757                         
758                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
759                         }
760                         
761                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
762                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
763                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
764                 }
765         
766                 //set accnos file as new current accnosfile
767                 string current = "";
768                 itTypes = outputTypes.find("accnos");
769                 if (itTypes != outputTypes.end()) {
770                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
771                 }
772                 
773                 m->mothurOutEndLine();
774                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
775                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
776                 m->mothurOutEndLine();
777                 
778                 return 0;
779                 
780         }
781         catch(exception& e) {
782                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
783                 exit(1);
784         }
785 }
786 //**********************************************************************************************************************
787 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
788         try {
789                 map<string, string>::iterator itUnique;
790                 int total = 0;
791                 
792                 //edit accnos file
793                 ifstream in2; 
794                 m->openInputFile(accnosFileName, in2);
795                 
796                 ofstream out2;
797                 m->openOutputFile(accnosFileName+".temp", out2);
798                 
799                 string name;
800                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
801                 set<string>::iterator itNames;
802                 set<string> chimerasInFile;
803                 set<string>::iterator itChimeras;
804
805                 
806                 while (!in2.eof()) {
807                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
808                         
809                         in2 >> name; m->gobble(in2);
810                         
811                         //find unique name
812                         itUnique = uniqueNames.find(name);
813                         
814                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
815                         else {
816                                 itChimeras = chimerasInFile.find((itUnique->second));
817                                 
818                                 if (itChimeras == chimerasInFile.end()) {
819                                         out2 << itUnique->second << endl;
820                                         chimerasInFile.insert((itUnique->second));
821                                         total++;
822                                 }
823                         }
824                 }
825                 in2.close();
826                 out2.close();
827                 
828                 m->mothurRemove(accnosFileName);
829                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
830                 
831                 
832                 
833                 //edit chimera file
834                 ifstream in; 
835                 m->openInputFile(outputFileName, in);
836                 
837                 ofstream out;
838                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
839                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
840                 
841                 float temp1;
842                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
843                 name = "";
844                 namesInFile.clear();    
845                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
846                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
847                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
848                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
849                 */
850                 
851                 while (!in.eof()) {
852                         
853                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
854                         
855                         bool print = false;
856                         in >> temp1;    m->gobble(in);
857                         in >> name;             m->gobble(in);
858                         in >> parent1;  m->gobble(in);
859                         in >> parent2;  m->gobble(in);
860                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
861                         m->gobble(in);
862                         
863                         //parse name - name will look like U68590/ab=1/
864                         string restOfName = "";
865                         int pos = name.find_first_of('/');
866                         if (pos != string::npos) {
867                                 restOfName = name.substr(pos);
868                                 name = name.substr(0, pos);
869                         }
870                         
871                         //find unique name
872                         itUnique = uniqueNames.find(name);
873                         
874                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
875                         else {
876                                 name = itUnique->second;
877                                 //is this name already in the file
878                                 itNames = namesInFile.find((name));
879                                 
880                                 if (itNames == namesInFile.end()) { //no not in file
881                                         if (flag == "N") { //are you really a no??
882                                                 //is this sequence really not chimeric??
883                                                 itChimeras = chimerasInFile.find(name);
884                                                 
885                                                 //then you really are a no so print, otherwise skip
886                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
887                                         }else{ print = true; }
888                                 }
889                         }
890                         
891                         if (print) {
892                                 out << temp1 << '\t' << name << restOfName << '\t';
893                                 namesInFile.insert(name);
894                                 
895                                 //parse parent1 names
896                                 if (parent1 != "*") {
897                                         restOfName = "";
898                                         pos = parent1.find_first_of('/');
899                                         if (pos != string::npos) {
900                                                 restOfName = parent1.substr(pos);
901                                                 parent1 = parent1.substr(0, pos);
902                                         }
903                                         
904                                         itUnique = uniqueNames.find(parent1);
905                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
906                                         else {  out << itUnique->second << restOfName << '\t';  }
907                                 }else { out << parent1 << '\t'; }
908                                 
909                                 //parse parent2 names
910                                 if (parent2 != "*") {
911                                         restOfName = "";
912                                         pos = parent2.find_first_of('/');
913                                         if (pos != string::npos) {
914                                                 restOfName = parent2.substr(pos);
915                                                 parent2 = parent2.substr(0, pos);
916                                         }
917                                         
918                                         itUnique = uniqueNames.find(parent2);
919                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
920                                         else {  out << itUnique->second << restOfName << '\t';  }
921                                 }else { out << parent2 << '\t'; }
922                                 
923                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
924                         }
925                 }
926                 in.close();
927                 out.close();
928                 
929                 m->mothurRemove(outputFileName);
930                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
931                 
932                                 
933                 //edit anls file
934                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
935                 /*
936                  ------------------------------------------------------------------------
937                  Query   (  179 nt) F21Fcsw_11639/ab=591/
938                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
939                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
940                  
941                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
942                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
943                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
944                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
945                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
946                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
947                  
948                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
949                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
950                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
951                  Diffs      NNN     N N  N                   N  N BB    NNN                              
952                  Votes      000     0 0  0                   0  0 ++    000                              
953                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
954                  
955                  A   159 TGGAACTGAGACACGGTCCAA 179
956                  Q   159 TGGAACTGAGACACGGTCCAA 179
957                  B   161 TGGAACTGAGACACGGTCCAA 181
958                  Diffs                        
959                  Votes                        
960                  Model   BBBBBBBBBBBBBBBBBBBBB
961                  
962                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
963                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
964                 */
965                 if (chimealns) {
966                         ifstream in3; 
967                         m->openInputFile(alnsFileName, in3);
968                 
969                         ofstream out3;
970                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
971                 
972                         name = "";
973                         namesInFile.clear();
974                         string line = "";
975                         
976                         while (!in3.eof()) {
977                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
978                                 
979                                 line = "";
980                                 line = m->getline(in3); 
981                                 string temp = "";
982                                 
983                                 if (line != "") {
984                                         istringstream iss(line);
985                                         iss >> temp;
986                                         
987                                         //are you a name line
988                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
989                                                 int spot = 0;
990                                                 for (int i = 0; i < line.length(); i++) {
991                                                         spot = i;
992                                                         if (line[i] == ')') { break; }
993                                                         else { out3 << line[i]; }
994                                                 }
995                                                 
996                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
997                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
998                                                 else {
999                                                         out << line[spot] << line[spot+1];
1000                                                         
1001                                                         name = line.substr(spot+2);
1002                                                         
1003                                                         //parse name - name will either look like U68590/ab=1/ or U68590
1004                                                         string restOfName = "";
1005                                                         int pos = name.find_first_of('/');
1006                                                         if (pos != string::npos) {
1007                                                                 restOfName = name.substr(pos);
1008                                                                 name = name.substr(0, pos);
1009                                                         }
1010                                                         
1011                                                         //find unique name
1012                                                         itUnique = uniqueNames.find(name);
1013                                                         
1014                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1015                                                         else {
1016                                                                 //only limit repeats on query names
1017                                                                 if (temp == "Query") {
1018                                                                         itNames = namesInFile.find((itUnique->second));
1019                                                                         
1020                                                                         if (itNames == namesInFile.end()) {
1021                                                                                 out << itUnique->second << restOfName << endl;
1022                                                                                 namesInFile.insert((itUnique->second));
1023                                                                         }
1024                                                                 }else { out << itUnique->second << restOfName << endl;  }
1025                                                         }
1026                                                         
1027                                                 }
1028                                                 
1029                                         }else { //not need to alter line
1030                                                 out3 << line << endl;
1031                                         }
1032                                 }else { out3 << endl; }
1033                         }
1034                         in3.close();
1035                         out3.close();
1036                         
1037                         m->mothurRemove(alnsFileName);
1038                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1039                 }
1040                 
1041                 return total;
1042         }
1043         catch(exception& e) {
1044                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1045                 exit(1);
1046         }
1047 }       
1048 //**********************************************************************************************************************
1049 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1050         try {
1051                 
1052                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1053                 
1054                 ofstream out;
1055                 m->openOutputFile(filename, out);
1056                 
1057                 //print new file in order of
1058                 for (int i = 0; i < nameMapCount.size(); i++) {
1059                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1060                 }
1061                 out.close();
1062                 
1063                 return 0;
1064         }
1065         catch(exception& e) {
1066                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1067                 exit(1);
1068         }
1069 }       
1070 //**********************************************************************************************************************
1071 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1072         try {
1073                 //create input file for uchime
1074                 //read through fastafile and store info
1075                 ifstream in;
1076                 m->openInputFile(filename, in);
1077                 
1078                 while (!in.eof()) {
1079                         
1080                         if (m->control_pressed) { in.close(); return 0; }
1081                         
1082                         Sequence seq(in); m->gobble(in);
1083                         seqs[seq.getName()] = seq.getAligned();
1084                 }
1085                 in.close();
1086                 
1087                 return 0;
1088         }
1089         catch(exception& e) {
1090                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1091                 exit(1);
1092         }
1093 }       
1094 //**********************************************************************************************************************
1095
1096 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1097         try {
1098                 string nameFile = "";
1099                 
1100                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1101                 
1102                 //use unique.seqs to create new name and fastafile
1103                 string inputString = "fasta=" + inputFile;
1104                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1105                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1106                 m->mothurCalling = true;
1107         
1108                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1109                 uniqueCommand->execute();
1110                 
1111                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1112                 
1113                 delete uniqueCommand;
1114                 m->mothurCalling = false;
1115                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1116                 
1117                 nameFile = filenames["name"][0];
1118                 inputFile = filenames["fasta"][0];
1119                 
1120                 return nameFile;
1121         }
1122         catch(exception& e) {
1123                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1124                 exit(1);
1125         }
1126 }
1127 //**********************************************************************************************************************
1128 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1129         try {
1130                 
1131                 int totalSeqs = 0;
1132                 int numChimeras = 0;
1133                 
1134                 for (int i = start; i < end; i++) {
1135                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
1136             
1137                         int error;
1138             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1139             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1140                         
1141                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1142                         totalSeqs += numSeqs;
1143                         
1144                         if (m->control_pressed) { return 0; }
1145                         
1146                         //remove file made for uchime
1147                         if (!m->debug) {  m->mothurRemove(filename);  }
1148             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1149                         
1150                         //append files
1151                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1152                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1153                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1154                         
1155                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1156                 }       
1157                 return totalSeqs;
1158                 
1159         }
1160         catch(exception& e) {
1161                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1162                 exit(1);
1163         }
1164 }       
1165 //**********************************************************************************************************************
1166
1167 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1168         try {
1169                 
1170                 outputFName = m->getFullPathName(outputFName);
1171                 filename = m->getFullPathName(filename);
1172                 alns = m->getFullPathName(alns);
1173                 
1174                 //to allow for spaces in the path
1175                 outputFName = "\"" + outputFName + "\"";
1176                 filename = "\"" + filename + "\"";
1177                 alns = "\"" + alns + "\"";
1178                                 
1179                 vector<char*> cPara;
1180                 
1181                 string uchimeCommand = uchimeLocation;
1182         uchimeCommand = "\"" + uchimeCommand + "\" ";
1183         
1184         char* tempUchime;
1185                 tempUchime= new char[uchimeCommand.length()+1]; 
1186                 *tempUchime = '\0';
1187                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1188                 cPara.push_back(tempUchime);
1189                 
1190         //are you using a reference file
1191                 if (templatefile != "self") {
1192             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1193             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1194             filename = outputFileName;
1195             filename = "\"" + filename + "\"";
1196                         //add reference file
1197                         char* tempRef = new char[5]; 
1198                         //strcpy(tempRef, "--db"); 
1199                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1200                         cPara.push_back(tempRef);  
1201                         char* tempR = new char[templatefile.length()+1];
1202                         //strcpy(tempR, templatefile.c_str());
1203                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1204                         cPara.push_back(tempR);
1205                 }
1206                 
1207                 char* tempIn = new char[8]; 
1208                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1209                 //strcpy(tempIn, "--input"); 
1210                 cPara.push_back(tempIn);
1211                 char* temp = new char[filename.length()+1];
1212                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1213                 //strcpy(temp, filename.c_str());
1214                 cPara.push_back(temp);
1215                 
1216                 char* tempO = new char[12]; 
1217                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1218                 //strcpy(tempO, "--uchimeout"); 
1219                 cPara.push_back(tempO);
1220                 char* tempout = new char[outputFName.length()+1];
1221                 //strcpy(tempout, outputFName.c_str());
1222                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1223                 cPara.push_back(tempout);
1224                 
1225                 if (chimealns) {
1226                         char* tempA = new char[13]; 
1227                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1228                         //strcpy(tempA, "--uchimealns"); 
1229                         cPara.push_back(tempA);
1230                         char* tempa = new char[alns.length()+1];
1231                         //strcpy(tempa, alns.c_str());
1232                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1233                         cPara.push_back(tempa);
1234                 }
1235                 
1236                 if (useAbskew) {
1237                         char* tempskew = new char[9];
1238                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1239                         //strcpy(tempskew, "--abskew"); 
1240                         cPara.push_back(tempskew);
1241                         char* tempSkew = new char[abskew.length()+1];
1242                         //strcpy(tempSkew, abskew.c_str());
1243                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1244                         cPara.push_back(tempSkew);
1245                 }
1246                 
1247                 if (useMinH) {
1248                         char* tempminh = new char[7]; 
1249                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1250                         //strcpy(tempminh, "--minh"); 
1251                         cPara.push_back(tempminh);
1252                         char* tempMinH = new char[minh.length()+1];
1253                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1254                         //strcpy(tempMinH, minh.c_str());
1255                         cPara.push_back(tempMinH);
1256                 }
1257                 
1258                 if (useMindiv) {
1259                         char* tempmindiv = new char[9]; 
1260                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1261                         //strcpy(tempmindiv, "--mindiv"); 
1262                         cPara.push_back(tempmindiv);
1263                         char* tempMindiv = new char[mindiv.length()+1];
1264                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1265                         //strcpy(tempMindiv, mindiv.c_str());
1266                         cPara.push_back(tempMindiv);
1267                 }
1268                 
1269                 if (useXn) {
1270                         char* tempxn = new char[5]; 
1271                         //strcpy(tempxn, "--xn"); 
1272                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1273                         cPara.push_back(tempxn);
1274                         char* tempXn = new char[xn.length()+1];
1275                         //strcpy(tempXn, xn.c_str());
1276                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1277                         cPara.push_back(tempXn);
1278                 }
1279                 
1280                 if (useDn) {
1281                         char* tempdn = new char[5]; 
1282                         //strcpy(tempdn, "--dn"); 
1283                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1284                         cPara.push_back(tempdn);
1285                         char* tempDn = new char[dn.length()+1];
1286                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1287                         //strcpy(tempDn, dn.c_str());
1288                         cPara.push_back(tempDn);
1289                 }
1290                 
1291                 if (useXa) {
1292                         char* tempxa = new char[5]; 
1293                         //strcpy(tempxa, "--xa"); 
1294                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1295                         cPara.push_back(tempxa);
1296                         char* tempXa = new char[xa.length()+1];
1297                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1298                         //strcpy(tempXa, xa.c_str());
1299                         cPara.push_back(tempXa);
1300                 }
1301                 
1302                 if (useChunks) {
1303                         char* tempchunks = new char[9]; 
1304                         //strcpy(tempchunks, "--chunks"); 
1305                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1306                         cPara.push_back(tempchunks);
1307                         char* tempChunks = new char[chunks.length()+1];
1308                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1309                         //strcpy(tempChunks, chunks.c_str());
1310                         cPara.push_back(tempChunks);
1311                 }
1312                 
1313                 if (useMinchunk) {
1314                         char* tempminchunk = new char[11]; 
1315                         //strcpy(tempminchunk, "--minchunk"); 
1316                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1317                         cPara.push_back(tempminchunk);
1318                         char* tempMinchunk = new char[minchunk.length()+1];
1319                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1320                         //strcpy(tempMinchunk, minchunk.c_str());
1321                         cPara.push_back(tempMinchunk);
1322                 }
1323                 
1324                 if (useIdsmoothwindow) {
1325                         char* tempidsmoothwindow = new char[17]; 
1326                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1327                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1328                         cPara.push_back(tempidsmoothwindow);
1329                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1330                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1331                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1332                         cPara.push_back(tempIdsmoothwindow);
1333                 }
1334                 
1335                 /*if (useMinsmoothid) {
1336                         char* tempminsmoothid = new char[14]; 
1337                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1338                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1339                         cPara.push_back(tempminsmoothid);
1340                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1341                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1342                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1343                         cPara.push_back(tempMinsmoothid);
1344                 }*/
1345                 
1346                 if (useMaxp) {
1347                         char* tempmaxp = new char[7]; 
1348                         //strcpy(tempmaxp, "--maxp"); 
1349                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1350                         cPara.push_back(tempmaxp);
1351                         char* tempMaxp = new char[maxp.length()+1];
1352                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1353                         //strcpy(tempMaxp, maxp.c_str());
1354                         cPara.push_back(tempMaxp);
1355                 }
1356                 
1357                 if (!skipgaps) {
1358                         char* tempskipgaps = new char[13]; 
1359                         //strcpy(tempskipgaps, "--[no]skipgaps");
1360                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1361                         cPara.push_back(tempskipgaps);
1362                 }
1363                 
1364                 if (!skipgaps2) {
1365                         char* tempskipgaps2 = new char[14]; 
1366                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1367                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1368                         cPara.push_back(tempskipgaps2);
1369                 }
1370                 
1371                 if (useMinlen) {
1372                         char* tempminlen = new char[9]; 
1373                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1374                         //strcpy(tempminlen, "--minlen"); 
1375                         cPara.push_back(tempminlen);
1376                         char* tempMinlen = new char[minlen.length()+1];
1377                         //strcpy(tempMinlen, minlen.c_str());
1378                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1379                         cPara.push_back(tempMinlen);
1380                 }
1381                 
1382                 if (useMaxlen) {
1383                         char* tempmaxlen = new char[9]; 
1384                         //strcpy(tempmaxlen, "--maxlen"); 
1385                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1386                         cPara.push_back(tempmaxlen);
1387                         char* tempMaxlen = new char[maxlen.length()+1];
1388                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1389                         //strcpy(tempMaxlen, maxlen.c_str());
1390                         cPara.push_back(tempMaxlen);
1391                 }
1392                 
1393                 if (ucl) {
1394                         char* tempucl = new char[5]; 
1395                         strcpy(tempucl, "--ucl"); 
1396                         cPara.push_back(tempucl);
1397                 }
1398                 
1399                 if (useQueryfract) {
1400                         char* tempqueryfract = new char[13]; 
1401                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1402                         //strcpy(tempqueryfract, "--queryfract"); 
1403                         cPara.push_back(tempqueryfract);
1404                         char* tempQueryfract = new char[queryfract.length()+1];
1405                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1406                         //strcpy(tempQueryfract, queryfract.c_str());
1407                         cPara.push_back(tempQueryfract);
1408                 }
1409                 
1410                 
1411                 char** uchimeParameters;
1412                 uchimeParameters = new char*[cPara.size()];
1413                 string commandString = "";
1414                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1415                 //int numArgs = cPara.size();
1416                 
1417                 //uchime_main(numArgs, uchimeParameters); 
1418                 //cout << "commandString = " << commandString << endl;
1419 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1420 #else
1421                 commandString = "\"" + commandString + "\"";
1422 #endif
1423         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1424                 system(commandString.c_str());
1425                 
1426                 //free memory
1427                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1428                 delete[] uchimeParameters; 
1429                 
1430                 //remove "" from filenames
1431                 outputFName = outputFName.substr(1, outputFName.length()-2);
1432                 filename = filename.substr(1, filename.length()-2);
1433                 alns = alns.substr(1, alns.length()-2);
1434                 
1435                 if (m->control_pressed) { return 0; }
1436                 
1437                 //create accnos file from uchime results
1438                 ifstream in; 
1439                 m->openInputFile(outputFName, in);
1440                 
1441                 ofstream out;
1442                 m->openOutputFile(accnos, out);
1443                 
1444                 int num = 0;
1445                 numChimeras = 0;
1446                 while(!in.eof()) {
1447                         
1448                         if (m->control_pressed) { break; }
1449                         
1450                         string name = "";
1451                         string chimeraFlag = "";
1452                         //in >> chimeraFlag >> name;
1453                         
1454             string line = m->getline(in);
1455             vector<string> pieces = m->splitWhiteSpace(line);
1456             if (pieces.size() > 2) { 
1457                 name = pieces[1];
1458                 //fix name if needed
1459                 if (templatefile == "self") { 
1460                     name = name.substr(0, name.length()-1); //rip off last /
1461                     name = name.substr(0, name.find_last_of('/'));
1462                 }
1463                 
1464                 chimeraFlag = pieces[pieces.size()-1];
1465                         }
1466                         //for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1467                         m->gobble(in);
1468                         
1469                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1470                         num++;
1471                 }
1472                 in.close();
1473                 out.close();
1474                 
1475         //if (templatefile != "self") {  m->mothurRemove(filename); }
1476         
1477                 return num;
1478         }
1479         catch(exception& e) {
1480                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1481                 exit(1);
1482         }
1483 }
1484 /**************************************************************************************************/
1485 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1486 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1487         try {
1488         
1489         ifstream in;
1490         m->openInputFile(filename, in);
1491         
1492         ofstream out;
1493         m->openOutputFile(output, out);
1494         
1495         while (!in.eof()) {
1496             if (m->control_pressed) { break;  }
1497             
1498             Sequence seq(in); m->gobble(in);
1499             
1500             if (seq.getName() != "") { seq.printSequence(out); }
1501         }
1502         in.close();
1503         out.close();
1504         
1505         return 0;
1506     }
1507         catch(exception& e) {
1508                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1509                 exit(1);
1510         }
1511 }
1512 /**************************************************************************************************/
1513
1514 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1515         try {
1516                 
1517                 processIDS.clear();
1518                 int process = 1;
1519                 int num = 0;
1520                 vector<string> files;
1521                 
1522 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1523                 //break up file into multiple files
1524                 m->divideFile(filename, processors, files);
1525                 
1526                 if (m->control_pressed) {  return 0;  }
1527                                 
1528                 //loop through and create all the processes you want
1529                 while (process != processors) {
1530                         int pid = fork();
1531                         
1532                         if (pid > 0) {
1533                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1534                                 process++;
1535                         }else if (pid == 0){
1536                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1537                                 
1538                                 //pass numSeqs to parent
1539                                 ofstream out;
1540                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1541                                 m->openOutputFile(tempFile, out);
1542                                 out << num << endl;
1543                                 out << numChimeras << endl;
1544                                 out.close();
1545                                 
1546                                 exit(0);
1547                         }else { 
1548                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1549                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1550                                 exit(0);
1551                         }
1552                 }
1553                 
1554                 //do my part
1555                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1556                 
1557                 //force parent to wait until all the processes are done
1558                 for (int i=0;i<processIDS.size();i++) { 
1559                         int temp = processIDS[i];
1560                         wait(&temp);
1561                 }
1562                 
1563                 for (int i = 0; i < processIDS.size(); i++) {
1564                         ifstream in;
1565                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1566                         m->openInputFile(tempFile, in);
1567                         if (!in.eof()) { 
1568                                 int tempNum = 0; 
1569                                 in >> tempNum; m->gobble(in);
1570                                 num += tempNum; 
1571                                 in >> tempNum;
1572                                 numChimeras += tempNum;
1573                         }
1574                         in.close(); m->mothurRemove(tempFile);
1575                 }
1576 #else
1577                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1578                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1579                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1580                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1581                 
1582                 //divide file
1583                 int count = 0;
1584                 int spot = 0;
1585                 map<int, ofstream*> filehandles;
1586                 map<int, ofstream*>::iterator it3;
1587                 
1588                 ofstream* temp;
1589                 for (int i = 0; i < processors; i++) {
1590                         temp = new ofstream;
1591                         filehandles[i] = temp;
1592                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1593                         files.push_back(filename+toString(i)+".temp");
1594                 }
1595                 
1596                 ifstream in;
1597                 m->openInputFile(filename, in);
1598                 
1599                 while(!in.eof()) {
1600                         
1601                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1602                         
1603                         Sequence tempSeq(in); m->gobble(in); 
1604                         
1605                         if (tempSeq.getName() != "") {
1606                                 tempSeq.printSequence(*(filehandles[spot])); 
1607                                 spot++; count++;
1608                                 if (spot == processors) { spot = 0; }
1609                         }
1610                 }
1611                 in.close();
1612                 
1613                 //delete memory
1614                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1615                         (*(it3->second)).close();
1616                         delete it3->second;
1617                 }
1618                 
1619                 //sanity check for number of processors
1620                 if (count < processors) { processors = count; }
1621                 
1622                 vector<uchimeData*> pDataArray; 
1623                 DWORD   dwThreadIdArray[processors-1];
1624                 HANDLE  hThreadArray[processors-1]; 
1625                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1626                 
1627                 //Create processor worker threads.
1628                 for( int i=1; i<processors; i++ ){
1629                         // Allocate memory for thread data.
1630                         string extension = toString(i) + ".temp";
1631                         
1632                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1633                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1634                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1635                         
1636                         pDataArray.push_back(tempUchime);
1637                         processIDS.push_back(i);
1638                         
1639                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1640                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1641                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1642                 }
1643                 
1644                 
1645                 //using the main process as a worker saves time and memory
1646                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1647                 
1648                 //Wait until all threads have terminated.
1649                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1650                 
1651                 //Close all thread handles and free memory allocations.
1652                 for(int i=0; i < pDataArray.size(); i++){
1653                         num += pDataArray[i]->count;
1654                         numChimeras += pDataArray[i]->numChimeras;
1655                         CloseHandle(hThreadArray[i]);
1656                         delete pDataArray[i];
1657                 }
1658 #endif          
1659                 
1660                 //append output files
1661                 for(int i=0;i<processIDS.size();i++){
1662                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1663                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1664                         
1665                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1666                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1667                         
1668                         if (chimealns) {
1669                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1670                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1671                         }
1672                 }
1673                 
1674                 //get rid of the file pieces.
1675                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1676                 return num;     
1677         }
1678         catch(exception& e) {
1679                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1680                 exit(1);
1681         }
1682 }
1683 /**************************************************************************************************/
1684
1685 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1686         try {
1687                 
1688                 processIDS.clear();
1689                 int process = 1;
1690                 int num = 0;
1691                 
1692                 //sanity check
1693                 if (groups.size() < processors) { processors = groups.size(); }
1694                 
1695                 //divide the groups between the processors
1696                 vector<linePair> lines;
1697                 int numGroupsPerProcessor = groups.size() / processors;
1698                 for (int i = 0; i < processors; i++) {
1699                         int startIndex =  i * numGroupsPerProcessor;
1700                         int endIndex = (i+1) * numGroupsPerProcessor;
1701                         if(i == (processors - 1)){      endIndex = groups.size();       }
1702                         lines.push_back(linePair(startIndex, endIndex));
1703                 }
1704                 
1705 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1706                                 
1707                 //loop through and create all the processes you want
1708                 while (process != processors) {
1709                         int pid = fork();
1710                         
1711                         if (pid > 0) {
1712                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1713                                 process++;
1714                         }else if (pid == 0){
1715                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1716                                 
1717                                 //pass numSeqs to parent
1718                                 ofstream out;
1719                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1720                                 m->openOutputFile(tempFile, out);
1721                                 out << num << endl;
1722                                 out.close();
1723                                 
1724                                 exit(0);
1725                         }else { 
1726                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1727                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1728                                 exit(0);
1729                         }
1730                 }
1731                 
1732                 //do my part
1733                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1734                 
1735                 //force parent to wait until all the processes are done
1736                 for (int i=0;i<processIDS.size();i++) { 
1737                         int temp = processIDS[i];
1738                         wait(&temp);
1739                 }
1740                 
1741                 for (int i = 0; i < processIDS.size(); i++) {
1742                         ifstream in;
1743                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1744                         m->openInputFile(tempFile, in);
1745                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1746                         in.close(); m->mothurRemove(tempFile);
1747                 }
1748                                 
1749 #else
1750                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1751                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1752                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1753                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1754                 
1755                 vector<uchimeData*> pDataArray; 
1756                 DWORD   dwThreadIdArray[processors-1];
1757                 HANDLE  hThreadArray[processors-1]; 
1758                 
1759                 //Create processor worker threads.
1760                 for( int i=1; i<processors; i++ ){
1761                         // Allocate memory for thread data.
1762                         string extension = toString(i) + ".temp";
1763                         
1764                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1765                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1766                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1767                         
1768                         pDataArray.push_back(tempUchime);
1769                         processIDS.push_back(i);
1770                         
1771                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1772                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1773                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1774                 }
1775                 
1776                 
1777                 //using the main process as a worker saves time and memory
1778                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1779                 
1780                 //Wait until all threads have terminated.
1781                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1782                 
1783                 //Close all thread handles and free memory allocations.
1784                 for(int i=0; i < pDataArray.size(); i++){
1785                         num += pDataArray[i]->count;
1786                         CloseHandle(hThreadArray[i]);
1787                         delete pDataArray[i];
1788                 }
1789 #endif          
1790                 
1791                                 
1792                 //append output files
1793                 for(int i=0;i<processIDS.size();i++){
1794                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1795                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1796                         
1797                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1798                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1799                         
1800                         if (chimealns) {
1801                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1802                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1803                         }
1804                 }
1805                 
1806                 return num;     
1807                 
1808         }
1809         catch(exception& e) {
1810                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1811                 exit(1);
1812         }
1813 }
1814 /**************************************************************************************************/
1815