]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
1.22.0
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){   
57         try {
58                 string helpString = "";
59                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
68                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
86 #ifdef USE_MPI
87                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
88 #endif
89                 helpString += "The chimera.uchime command should be in the following format: \n";
90                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
102         try {
103                 abort = true; calledHelp = true;
104                 setParameters();
105                 vector<string> tempOutNames;
106                 outputTypes["chimera"] = tempOutNames;
107                 outputTypes["accnos"] = tempOutNames;
108                 outputTypes["alns"] = tempOutNames;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112                 exit(1);
113         }
114 }
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
117         try {
118                 abort = false; calledHelp = false; 
119                 ReferenceDB* rdb = ReferenceDB::getInstance();
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter("chimera.uchime");
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         vector<string> tempOutNames;
140                         outputTypes["chimera"] = tempOutNames;
141                         outputTypes["accnos"] = tempOutNames;
142                         outputTypes["alns"] = tempOutNames;
143                         
144                         //if the user changes the input directory command factory will send this info to us in the output parameter 
145                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
146                         if (inputDir == "not found"){   inputDir = "";          }
147                         
148                         //check for required parameters
149                         fastafile = validParameter.validFile(parameters, "fasta", false);
150                         if (fastafile == "not found") {                                 
151                                 //if there is a current fasta file, use it
152                                 string filename = m->getFastaFile(); 
153                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
155                         }else { 
156                                 m->splitAtDash(fastafile, fastaFileNames);
157                                 
158                                 //go through files and make sure they are good, if not, then disregard them
159                                 for (int i = 0; i < fastaFileNames.size(); i++) {
160                                         
161                                         bool ignore = false;
162                                         if (fastaFileNames[i] == "current") { 
163                                                 fastaFileNames[i] = m->getFastaFile(); 
164                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
165                                                 else {  
166                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
167                                                         //erase from file list
168                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
169                                                         i--;
170                                                 }
171                                         }
172                                         
173                                         if (!ignore) {
174                                                 
175                                                 if (inputDir != "") {
176                                                         string path = m->hasPath(fastaFileNames[i]);
177                                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
179                                                 }
180                                                 
181                                                 int ableToOpen;
182                                                 ifstream in;
183                                                 
184                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
185                                                 
186                                                 //if you can't open it, try default location
187                                                 if (ableToOpen == 1) {
188                                                         if (m->getDefaultPath() != "") { //default path is set
189                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
191                                                                 ifstream in2;
192                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
193                                                                 in2.close();
194                                                                 fastaFileNames[i] = tryPath;
195                                                         }
196                                                 }
197                                                 
198                                                 if (ableToOpen == 1) {
199                                                         if (m->getOutputDir() != "") { //default path is set
200                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
202                                                                 ifstream in2;
203                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
204                                                                 in2.close();
205                                                                 fastaFileNames[i] = tryPath;
206                                                         }
207                                                 }
208                                                 
209                                                 in.close();
210                                                 
211                                                 if (ableToOpen == 1) { 
212                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
213                                                         //erase from file list
214                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
215                                                         i--;
216                                                 }else {
217                                                         m->setFastaFile(fastaFileNames[i]);
218                                                 }
219                                         }
220                                 }
221                                 
222                                 //make sure there is at least one valid file left
223                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
224                         }
225                         
226                         
227                         //check for required parameters
228                         bool hasName = true;
229                         namefile = validParameter.validFile(parameters, "name", false);
230                         if (namefile == "not found") { namefile = "";  hasName = false; }
231                         else { 
232                                 m->splitAtDash(namefile, nameFileNames);
233                                 
234                                 //go through files and make sure they are good, if not, then disregard them
235                                 for (int i = 0; i < nameFileNames.size(); i++) {
236                                         
237                                         bool ignore = false;
238                                         if (nameFileNames[i] == "current") { 
239                                                 nameFileNames[i] = m->getNameFile(); 
240                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
241                                                 else {  
242                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
243                                                         //erase from file list
244                                                         nameFileNames.erase(nameFileNames.begin()+i);
245                                                         i--;
246                                                 }
247                                         }
248                                         
249                                         if (!ignore) {
250                                                 
251                                                 if (inputDir != "") {
252                                                         string path = m->hasPath(nameFileNames[i]);
253                                                         //if the user has not given a path then, add inputdir. else leave path alone.
254                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
255                                                 }
256                                                 
257                                                 int ableToOpen;
258                                                 ifstream in;
259                                                 
260                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
261                                                 
262                                                 //if you can't open it, try default location
263                                                 if (ableToOpen == 1) {
264                                                         if (m->getDefaultPath() != "") { //default path is set
265                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
267                                                                 ifstream in2;
268                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
269                                                                 in2.close();
270                                                                 nameFileNames[i] = tryPath;
271                                                         }
272                                                 }
273                                                 
274                                                 if (ableToOpen == 1) {
275                                                         if (m->getOutputDir() != "") { //default path is set
276                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
278                                                                 ifstream in2;
279                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280                                                                 in2.close();
281                                                                 nameFileNames[i] = tryPath;
282                                                         }
283                                                 }
284                                                 
285                                                 in.close();
286                                                 
287                                                 if (ableToOpen == 1) { 
288                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
289                                                         //erase from file list
290                                                         nameFileNames.erase(nameFileNames.begin()+i);
291                                                         i--;
292                                                 }else {
293                                                         m->setNameFile(nameFileNames[i]);
294                                                 }
295                                         }
296                                 }
297                                 
298                                 //make sure there is at least one valid file left
299                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
300                         }
301                         
302                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
303                         
304                         bool hasGroup = true;
305                         groupfile = validParameter.validFile(parameters, "group", false);
306                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
307                         else { 
308                                 m->splitAtDash(groupfile, groupFileNames);
309                                 
310                                 //go through files and make sure they are good, if not, then disregard them
311                                 for (int i = 0; i < groupFileNames.size(); i++) {
312                                         
313                                         bool ignore = false;
314                                         if (groupFileNames[i] == "current") { 
315                                                 groupFileNames[i] = m->getGroupFile(); 
316                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
317                                                 else {  
318                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
319                                                         //erase from file list
320                                                         groupFileNames.erase(groupFileNames.begin()+i);
321                                                         i--;
322                                                 }
323                                         }
324                                         
325                                         if (!ignore) {
326                                                 
327                                                 if (inputDir != "") {
328                                                         string path = m->hasPath(groupFileNames[i]);
329                                                         //if the user has not given a path then, add inputdir. else leave path alone.
330                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
331                                                 }
332                                                 
333                                                 int ableToOpen;
334                                                 ifstream in;
335                                                 
336                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
337                                                 
338                                                 //if you can't open it, try default location
339                                                 if (ableToOpen == 1) {
340                                                         if (m->getDefaultPath() != "") { //default path is set
341                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
343                                                                 ifstream in2;
344                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
345                                                                 in2.close();
346                                                                 groupFileNames[i] = tryPath;
347                                                         }
348                                                 }
349                                                 
350                                                 if (ableToOpen == 1) {
351                                                         if (m->getOutputDir() != "") { //default path is set
352                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
354                                                                 ifstream in2;
355                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
356                                                                 in2.close();
357                                                                 groupFileNames[i] = tryPath;
358                                                         }
359                                                 }
360                                                 
361                                                 in.close();
362                                                 
363                                                 if (ableToOpen == 1) { 
364                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
365                                                         //erase from file list
366                                                         groupFileNames.erase(groupFileNames.begin()+i);
367                                                         i--;
368                                                 }else {
369                                                         m->setGroupFile(groupFileNames[i]);
370                                                 }
371                                         }
372                                 }
373                                 
374                                 //make sure there is at least one valid file left
375                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
376                         }
377                         
378                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
379                         
380                         
381                         //if the user changes the output directory command factory will send this info to us in the output parameter 
382                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
383                         
384                         string path;
385                         it = parameters.find("reference");
386                         //user has given a template file
387                         if(it != parameters.end()){ 
388                                 if (it->second == "self") { templatefile = "self"; }
389                                 else {
390                                         path = m->hasPath(it->second);
391                                         //if the user has not given a path then, add inputdir. else leave path alone.
392                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
393                                         
394                                         templatefile = validParameter.validFile(parameters, "reference", true);
395                                         if (templatefile == "not open") { abort = true; }
396                                         else if (templatefile == "not found") { //check for saved reference sequences
397                                                 if (rdb->getSavedReference() != "") {
398                                                         templatefile = rdb->getSavedReference();
399                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
400                                                 }else {
401                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
402                                                         m->mothurOutEndLine();
403                                                         abort = true; 
404                                                 }
405                                         }
406                                 }
407                         }else if (hasName) {  templatefile = "self"; }
408                         else { 
409                                 if (rdb->getSavedReference() != "") {
410                                         templatefile = rdb->getSavedReference();
411                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
412                                 }else {
413                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
414                                         m->mothurOutEndLine();
415                                         templatefile = ""; abort = true; 
416                                 } 
417                         }
418                                 
419                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
420                         m->setProcessors(temp);
421                         convert(temp, processors);
422                         
423                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
424                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
425                         
426                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
427                         chimealns = m->isTrue(temp); 
428                         
429                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
430                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
431                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
432                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
433                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
434                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
435                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
436                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
437                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
438                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
439                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
440                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
441                         
442                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
443                         ucl = m->isTrue(temp);
444                         
445                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
446                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
447                         
448                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
449                         skipgaps = m->isTrue(temp); 
450
451                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
452                         skipgaps2 = m->isTrue(temp); 
453                         
454                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
456                         
457                         //look for uchime exe
458                         path = m->argv;
459                         string tempPath = path;
460                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461                         path = path.substr(0, (tempPath.find_last_of('m')));
462                         
463                         string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
465                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
466 #else
467                         uchimeCommand = path + "uchime.exe";
468 #endif
469                         
470                         //test to make sure uchime exists
471                         ifstream in;
472                         uchimeCommand = m->getFullPathName(uchimeCommand);
473                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
474                         if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
475                 }
476         }
477         catch(exception& e) {
478                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
479                 exit(1);
480         }
481 }
482 //***************************************************************************************************************
483
484 int ChimeraUchimeCommand::execute(){
485         try{
486                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
487                 
488                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
489                 
490                 for (int s = 0; s < fastaFileNames.size(); s++) {
491                         
492                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
493                         
494                         int start = time(NULL); 
495                         string nameFile = "";
496                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
497                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
498                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
499                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
500                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
501                                 
502                         //you provided a groupfile
503                         string groupFile = "";
504                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
505                         
506                         if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
507
508                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
509                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
510                                         nameFile = nameFileNames[s];
511                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
512                                                                         
513                                 map<string, string> seqs;  
514                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
515
516                                 //read namefile
517                                 vector<seqPriorityNode> nameMapCount;
518                                 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
519                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
520                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
521                                 
522                                 printFile(nameMapCount, newFasta);
523                                 fastaFileNames[s] = newFasta;
524                         }
525                         
526                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
527                         
528                         if (groupFile != "") {
529                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
530                                         nameFile = nameFileNames[s];
531                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
532                                 
533                                 //Parse sequences by group
534                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
535                                 vector<string> groups = parser.getNamesOfGroups();
536                                         
537                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
538                                                                 
539                                 //clears files
540                                 ofstream out, out1, out2;
541                                 m->openOutputFile(outputFileName, out); out.close(); 
542                                 m->openOutputFile(accnosFileName, out1); out1.close();
543                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
544                                 int totalSeqs = 0;
545                                 
546         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
547                                 if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
548                                 else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups);                      }
549         #else
550                                 totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);
551         #endif
552                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
553
554                                 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
555                                 
556                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
557                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
558                                 
559                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
560                                         
561                         }else{
562                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
563                         
564                                 int numSeqs = 0;
565                                 int numChimeras = 0;
566         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
567                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
568                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
569         #else
570                                 numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras);
571         #endif
572                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
573                         
574                                 //remove file made for uchime
575                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
576                         
577                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
578                         }
579                         
580                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
581                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
582                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
583                 }
584         
585                 //set accnos file as new current accnosfile
586                 string current = "";
587                 itTypes = outputTypes.find("accnos");
588                 if (itTypes != outputTypes.end()) {
589                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
590                 }
591                 
592                 m->mothurOutEndLine();
593                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
594                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
595                 m->mothurOutEndLine();
596                 
597                 return 0;
598                 
599         }
600         catch(exception& e) {
601                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
602                 exit(1);
603         }
604 }
605 //**********************************************************************************************************************
606 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
607         try {
608                 map<string, string> uniqueNames = parser.getAllSeqsMap();
609                 map<string, string>::iterator itUnique;
610                 int total = 0;
611                 
612                 //edit accnos file
613                 ifstream in2; 
614                 m->openInputFile(accnosFileName, in2);
615                 
616                 ofstream out2;
617                 m->openOutputFile(accnosFileName+".temp", out2);
618                 
619                 string name;
620                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
621                 set<string>::iterator itNames;
622                 set<string> chimerasInFile;
623                 set<string>::iterator itChimeras;
624
625                 
626                 while (!in2.eof()) {
627                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
628                         
629                         in2 >> name; m->gobble(in2);
630                         
631                         //find unique name
632                         itUnique = uniqueNames.find(name);
633                         
634                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
635                         else {
636                                 itChimeras = chimerasInFile.find((itUnique->second));
637                                 
638                                 if (itChimeras == chimerasInFile.end()) {
639                                         out2 << itUnique->second << endl;
640                                         chimerasInFile.insert((itUnique->second));
641                                         total++;
642                                 }
643                         }
644                 }
645                 in2.close();
646                 out2.close();
647                 
648                 m->mothurRemove(accnosFileName);
649                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
650                 
651                 
652                 
653                 //edit chimera file
654                 ifstream in; 
655                 m->openInputFile(outputFileName, in);
656                 
657                 ofstream out;
658                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
659                 
660                 float temp1;
661                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
662                 name = "";
663                 namesInFile.clear();    
664                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
665                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
666                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
667                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
668                 */
669                 
670                 while (!in.eof()) {
671                         
672                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
673                         
674                         bool print = false;
675                         in >> temp1;    m->gobble(in);
676                         in >> name;             m->gobble(in);
677                         in >> parent1;  m->gobble(in);
678                         in >> parent2;  m->gobble(in);
679                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
680                         m->gobble(in);
681                         
682                         //parse name - name will look like U68590/ab=1/
683                         string restOfName = "";
684                         int pos = name.find_first_of('/');
685                         if (pos != string::npos) {
686                                 restOfName = name.substr(pos);
687                                 name = name.substr(0, pos);
688                         }
689                         
690                         //find unique name
691                         itUnique = uniqueNames.find(name);
692                         
693                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
694                         else {
695                                 name = itUnique->second;
696                                 //is this name already in the file
697                                 itNames = namesInFile.find((name));
698                                 
699                                 if (itNames == namesInFile.end()) { //no not in file
700                                         if (flag == "N") { //are you really a no??
701                                                 //is this sequence really not chimeric??
702                                                 itChimeras = chimerasInFile.find(name);
703                                                 
704                                                 //then you really are a no so print, otherwise skip
705                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
706                                         }else{ print = true; }
707                                 }
708                         }
709                         
710                         if (print) {
711                                 out << temp1 << '\t' << name << restOfName << '\t';
712                                 namesInFile.insert(name);
713                                 
714                                 //parse parent1 names
715                                 if (parent1 != "*") {
716                                         restOfName = "";
717                                         pos = parent1.find_first_of('/');
718                                         if (pos != string::npos) {
719                                                 restOfName = parent1.substr(pos);
720                                                 parent1 = parent1.substr(0, pos);
721                                         }
722                                         
723                                         itUnique = uniqueNames.find(parent1);
724                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
725                                         else {  out << itUnique->second << restOfName << '\t';  }
726                                 }else { out << parent1 << '\t'; }
727                                 
728                                 //parse parent2 names
729                                 if (parent2 != "*") {
730                                         restOfName = "";
731                                         pos = parent2.find_first_of('/');
732                                         if (pos != string::npos) {
733                                                 restOfName = parent2.substr(pos);
734                                                 parent2 = parent2.substr(0, pos);
735                                         }
736                                         
737                                         itUnique = uniqueNames.find(parent2);
738                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
739                                         else {  out << itUnique->second << restOfName << '\t';  }
740                                 }else { out << parent2 << '\t'; }
741                                 
742                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
743                         }
744                 }
745                 in.close();
746                 out.close();
747                 
748                 m->mothurRemove(outputFileName);
749                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
750                 
751                                 
752                 //edit anls file
753                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
754                 /*
755                  ------------------------------------------------------------------------
756                  Query   (  179 nt) F21Fcsw_11639/ab=591/
757                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
758                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
759                  
760                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
761                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
762                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
763                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
764                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
765                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
766                  
767                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
768                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
769                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
770                  Diffs      NNN     N N  N                   N  N BB    NNN                              
771                  Votes      000     0 0  0                   0  0 ++    000                              
772                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
773                  
774                  A   159 TGGAACTGAGACACGGTCCAA 179
775                  Q   159 TGGAACTGAGACACGGTCCAA 179
776                  B   161 TGGAACTGAGACACGGTCCAA 181
777                  Diffs                        
778                  Votes                        
779                  Model   BBBBBBBBBBBBBBBBBBBBB
780                  
781                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
782                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
783                 */
784                 if (chimealns) {
785                         ifstream in3; 
786                         m->openInputFile(alnsFileName, in3);
787                 
788                         ofstream out3;
789                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
790                 
791                         name = "";
792                         namesInFile.clear();
793                         string line = "";
794                         
795                         while (!in3.eof()) {
796                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
797                                 
798                                 line = "";
799                                 line = m->getline(in3); 
800                                 string temp = "";
801                                 
802                                 if (line != "") {
803                                         istringstream iss(line);
804                                         iss >> temp;
805                                         
806                                         //are you a name line
807                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
808                                                 int spot = 0;
809                                                 for (int i = 0; i < line.length(); i++) {
810                                                         spot = i;
811                                                         if (line[i] == ')') { break; }
812                                                         else { out3 << line[i]; }
813                                                 }
814                                                 
815                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
816                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
817                                                 else {
818                                                         out << line[spot] << line[spot+1];
819                                                         
820                                                         name = line.substr(spot+2);
821                                                         
822                                                         //parse name - name will either look like U68590/ab=1/ or U68590
823                                                         string restOfName = "";
824                                                         int pos = name.find_first_of('/');
825                                                         if (pos != string::npos) {
826                                                                 restOfName = name.substr(pos);
827                                                                 name = name.substr(0, pos);
828                                                         }
829                                                         
830                                                         //find unique name
831                                                         itUnique = uniqueNames.find(name);
832                                                         
833                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
834                                                         else {
835                                                                 //only limit repeats on query names
836                                                                 if (temp == "Query") {
837                                                                         itNames = namesInFile.find((itUnique->second));
838                                                                         
839                                                                         if (itNames == namesInFile.end()) {
840                                                                                 out << itUnique->second << restOfName << endl;
841                                                                                 namesInFile.insert((itUnique->second));
842                                                                         }
843                                                                 }else { out << itUnique->second << restOfName << endl;  }
844                                                         }
845                                                         
846                                                 }
847                                                 
848                                         }else { //not need to alter line
849                                                 out3 << line << endl;
850                                         }
851                                 }else { out3 << endl; }
852                         }
853                         in3.close();
854                         out3.close();
855                         
856                         m->mothurRemove(alnsFileName);
857                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
858                 }
859                 
860                 return total;
861         }
862         catch(exception& e) {
863                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
864                 exit(1);
865         }
866 }       
867 //**********************************************************************************************************************
868 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
869         try {
870                 
871                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
872                 
873                 ofstream out;
874                 m->openOutputFile(filename, out);
875                 
876                 //print new file in order of
877                 for (int i = 0; i < nameMapCount.size(); i++) {
878                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
879                 }
880                 out.close();
881                 
882                 return 0;
883         }
884         catch(exception& e) {
885                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
886                 exit(1);
887         }
888 }       
889 //**********************************************************************************************************************
890 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
891         try {
892                 //create input file for uchime
893                 //read through fastafile and store info
894                 ifstream in;
895                 m->openInputFile(filename, in);
896                 
897                 while (!in.eof()) {
898                         
899                         if (m->control_pressed) { in.close(); return 0; }
900                         
901                         Sequence seq(in); m->gobble(in);
902                         seqs[seq.getName()] = seq.getAligned();
903                 }
904                 in.close();
905                 
906                 return 0;
907         }
908         catch(exception& e) {
909                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
910                 exit(1);
911         }
912 }       
913 //**********************************************************************************************************************
914
915 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
916         try {
917                 string nameFile = "";
918                 
919                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
920                 
921                 //use unique.seqs to create new name and fastafile
922                 string inputString = "fasta=" + inputFile;
923                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
924                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
925                 
926                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
927                 uniqueCommand->execute();
928                 
929                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
930                 
931                 delete uniqueCommand;
932                 
933                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
934                 
935                 nameFile = filenames["name"][0];
936                 inputFile = filenames["fasta"][0];
937                 
938                 return nameFile;
939         }
940         catch(exception& e) {
941                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
942                 exit(1);
943         }
944 }
945 //**********************************************************************************************************************
946 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
947         try {
948                 
949                 int totalSeqs = 0;
950                 int numChimeras = 0;
951                 
952                 for (int i = start; i < end; i++) {
953                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
954                         
955                         int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
956                         
957                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
958                         totalSeqs += numSeqs;
959                         
960                         if (m->control_pressed) { return 0; }
961                         
962                         //remove file made for uchime
963                         m->mothurRemove(filename);
964                         
965                         //append files
966                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
967                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
968                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
969                         
970                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
971                 }       
972                 
973                 return totalSeqs;
974                 
975         }
976         catch(exception& e) {
977                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
978                 exit(1);
979         }
980 }       
981 //**********************************************************************************************************************
982
983 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
984         try {
985                 //to allow for spaces in the path
986                 outputFName = "\"" + outputFName + "\"";
987                 filename = "\"" + filename + "\"";
988                 alns = "\"" + alns + "\"";
989                                 
990                 vector<char*> cPara;
991         
992                 char* tempUchime;
993 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
994                 tempUchime= new char[10];  
995                 *tempUchime = '\0';
996                 strncat(tempUchime, "./uchime ", 9); 
997 #else
998                 tempUchime= new char[8]; 
999                 *tempUchime = '\0';
1000                 strncat(tempUchime, "uchime ", 7); 
1001 #endif
1002                 cPara.push_back(tempUchime);
1003                 
1004                 char* tempIn = new char[8]; 
1005                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1006                 //strcpy(tempIn, "--input"); 
1007                 cPara.push_back(tempIn);
1008                 char* temp = new char[filename.length()+1];
1009                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1010                 //strcpy(temp, filename.c_str());
1011                 cPara.push_back(temp);
1012                 
1013                 //are you using a reference file
1014                 if (templatefile != "self") {
1015                         //add reference file
1016                         char* tempRef = new char[5]; 
1017                         //strcpy(tempRef, "--db"); 
1018                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1019                         cPara.push_back(tempRef);  
1020                         char* tempR = new char[templatefile.length()+1];
1021                         //strcpy(tempR, templatefile.c_str());
1022                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1023                         cPara.push_back(tempR);
1024                 }
1025                 
1026                 char* tempO = new char[12]; 
1027                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1028                 //strcpy(tempO, "--uchimeout"); 
1029                 cPara.push_back(tempO);
1030                 char* tempout = new char[outputFName.length()+1];
1031                 //strcpy(tempout, outputFName.c_str());
1032                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1033                 cPara.push_back(tempout);
1034                 
1035                 if (chimealns) {
1036                         char* tempA = new char[13]; 
1037                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1038                         //strcpy(tempA, "--uchimealns"); 
1039                         cPara.push_back(tempA);
1040                         char* tempa = new char[alns.length()+1];
1041                         //strcpy(tempa, alns.c_str());
1042                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1043                         cPara.push_back(tempa);
1044                 }
1045                 
1046                 if (useAbskew) {
1047                         char* tempskew = new char[9];
1048                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1049                         //strcpy(tempskew, "--abskew"); 
1050                         cPara.push_back(tempskew);
1051                         char* tempSkew = new char[abskew.length()+1];
1052                         //strcpy(tempSkew, abskew.c_str());
1053                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1054                         cPara.push_back(tempSkew);
1055                 }
1056                 
1057                 if (useMinH) {
1058                         char* tempminh = new char[7]; 
1059                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1060                         //strcpy(tempminh, "--minh"); 
1061                         cPara.push_back(tempminh);
1062                         char* tempMinH = new char[minh.length()+1];
1063                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1064                         //strcpy(tempMinH, minh.c_str());
1065                         cPara.push_back(tempMinH);
1066                 }
1067                 
1068                 if (useMindiv) {
1069                         char* tempmindiv = new char[9]; 
1070                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1071                         //strcpy(tempmindiv, "--mindiv"); 
1072                         cPara.push_back(tempmindiv);
1073                         char* tempMindiv = new char[mindiv.length()+1];
1074                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1075                         //strcpy(tempMindiv, mindiv.c_str());
1076                         cPara.push_back(tempMindiv);
1077                 }
1078                 
1079                 if (useXn) {
1080                         char* tempxn = new char[5]; 
1081                         //strcpy(tempxn, "--xn"); 
1082                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1083                         cPara.push_back(tempxn);
1084                         char* tempXn = new char[xn.length()+1];
1085                         //strcpy(tempXn, xn.c_str());
1086                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1087                         cPara.push_back(tempXn);
1088                 }
1089                 
1090                 if (useDn) {
1091                         char* tempdn = new char[5]; 
1092                         //strcpy(tempdn, "--dn"); 
1093                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1094                         cPara.push_back(tempdn);
1095                         char* tempDn = new char[dn.length()+1];
1096                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1097                         //strcpy(tempDn, dn.c_str());
1098                         cPara.push_back(tempDn);
1099                 }
1100                 
1101                 if (useXa) {
1102                         char* tempxa = new char[5]; 
1103                         //strcpy(tempxa, "--xa"); 
1104                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1105                         cPara.push_back(tempxa);
1106                         char* tempXa = new char[xa.length()+1];
1107                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1108                         //strcpy(tempXa, xa.c_str());
1109                         cPara.push_back(tempXa);
1110                 }
1111                 
1112                 if (useChunks) {
1113                         char* tempchunks = new char[9]; 
1114                         //strcpy(tempchunks, "--chunks"); 
1115                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1116                         cPara.push_back(tempchunks);
1117                         char* tempChunks = new char[chunks.length()+1];
1118                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1119                         //strcpy(tempChunks, chunks.c_str());
1120                         cPara.push_back(tempChunks);
1121                 }
1122                 
1123                 if (useMinchunk) {
1124                         char* tempminchunk = new char[11]; 
1125                         //strcpy(tempminchunk, "--minchunk"); 
1126                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1127                         cPara.push_back(tempminchunk);
1128                         char* tempMinchunk = new char[minchunk.length()+1];
1129                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1130                         //strcpy(tempMinchunk, minchunk.c_str());
1131                         cPara.push_back(tempMinchunk);
1132                 }
1133                 
1134                 if (useIdsmoothwindow) {
1135                         char* tempidsmoothwindow = new char[17]; 
1136                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1137                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1138                         cPara.push_back(tempidsmoothwindow);
1139                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1140                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1141                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1142                         cPara.push_back(tempIdsmoothwindow);
1143                 }
1144                 
1145                 /*if (useMinsmoothid) {
1146                         char* tempminsmoothid = new char[14]; 
1147                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1148                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1149                         cPara.push_back(tempminsmoothid);
1150                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1151                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1152                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1153                         cPara.push_back(tempMinsmoothid);
1154                 }*/
1155                 
1156                 if (useMaxp) {
1157                         char* tempmaxp = new char[7]; 
1158                         //strcpy(tempmaxp, "--maxp"); 
1159                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1160                         cPara.push_back(tempmaxp);
1161                         char* tempMaxp = new char[maxp.length()+1];
1162                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1163                         //strcpy(tempMaxp, maxp.c_str());
1164                         cPara.push_back(tempMaxp);
1165                 }
1166                 
1167                 if (!skipgaps) {
1168                         char* tempskipgaps = new char[13]; 
1169                         //strcpy(tempskipgaps, "--[no]skipgaps");
1170                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1171                         cPara.push_back(tempskipgaps);
1172                 }
1173                 
1174                 if (!skipgaps2) {
1175                         char* tempskipgaps2 = new char[14]; 
1176                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1177                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1178                         cPara.push_back(tempskipgaps2);
1179                 }
1180                 
1181                 if (useMinlen) {
1182                         char* tempminlen = new char[9]; 
1183                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1184                         //strcpy(tempminlen, "--minlen"); 
1185                         cPara.push_back(tempminlen);
1186                         char* tempMinlen = new char[minlen.length()+1];
1187                         //strcpy(tempMinlen, minlen.c_str());
1188                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1189                         cPara.push_back(tempMinlen);
1190                 }
1191                 
1192                 if (useMaxlen) {
1193                         char* tempmaxlen = new char[9]; 
1194                         //strcpy(tempmaxlen, "--maxlen"); 
1195                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1196                         cPara.push_back(tempmaxlen);
1197                         char* tempMaxlen = new char[maxlen.length()+1];
1198                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1199                         //strcpy(tempMaxlen, maxlen.c_str());
1200                         cPara.push_back(tempMaxlen);
1201                 }
1202                 
1203                 if (ucl) {
1204                         char* tempucl = new char[5]; 
1205                         strcpy(tempucl, "--ucl"); 
1206                         cPara.push_back(tempucl);
1207                 }
1208                 
1209                 if (useQueryfract) {
1210                         char* tempqueryfract = new char[13]; 
1211                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1212                         //strcpy(tempqueryfract, "--queryfract"); 
1213                         cPara.push_back(tempqueryfract);
1214                         char* tempQueryfract = new char[queryfract.length()+1];
1215                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1216                         //strcpy(tempQueryfract, queryfract.c_str());
1217                         cPara.push_back(tempQueryfract);
1218                 }
1219                 
1220                 
1221                 char** uchimeParameters;
1222                 uchimeParameters = new char*[cPara.size()];
1223                 string commandString = "";
1224                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1225                 //int numArgs = cPara.size();
1226                 
1227                 //uchime_main(numArgs, uchimeParameters); 
1228                 //cout << "commandString = " << commandString << endl;
1229                 system(commandString.c_str());
1230                 
1231                 //free memory
1232                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1233                 delete[] uchimeParameters; 
1234                 
1235                 //remove "" from filenames
1236                 outputFName = outputFName.substr(1, outputFName.length()-2);
1237                 filename = filename.substr(1, filename.length()-2);
1238                 alns = alns.substr(1, alns.length()-2);
1239                 
1240                 if (m->control_pressed) { return 0; }
1241                 
1242                 //create accnos file from uchime results
1243                 ifstream in; 
1244                 m->openInputFile(outputFName, in);
1245                 
1246                 ofstream out;
1247                 m->openOutputFile(accnos, out);
1248                 
1249                 int num = 0;
1250                 numChimeras = 0;
1251                 while(!in.eof()) {
1252                         
1253                         if (m->control_pressed) { break; }
1254                         
1255                         string name = "";
1256                         string chimeraFlag = "";
1257                         in >> chimeraFlag >> name;
1258                         
1259                         //fix name if needed
1260                         if (templatefile == "self") { 
1261                                 name = name.substr(0, name.length()-1); //rip off last /
1262                                 name = name.substr(0, name.find_last_of('/'));
1263                         }
1264                         
1265                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1266                         m->gobble(in);
1267                         
1268                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1269                         num++;
1270                 }
1271                 in.close();
1272                 out.close();
1273                 
1274                 return num;
1275         }
1276         catch(exception& e) {
1277                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1278                 exit(1);
1279         }
1280 }
1281 /**************************************************************************************************/
1282
1283 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1284         try {
1285                 
1286                 processIDS.clear();
1287                 int process = 1;
1288                 int num = 0;
1289 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
1290                 //break up file into multiple files
1291                 vector<string> files;
1292                 m->divideFile(filename, processors, files);
1293                 
1294                 if (m->control_pressed) {  return 0;  }
1295                                 
1296                 //loop through and create all the processes you want
1297                 while (process != processors) {
1298                         int pid = fork();
1299                         
1300                         if (pid > 0) {
1301                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1302                                 process++;
1303                         }else if (pid == 0){
1304                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1305                                 
1306                                 //pass numSeqs to parent
1307                                 ofstream out;
1308                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1309                                 m->openOutputFile(tempFile, out);
1310                                 out << num << endl;
1311                                 out << numChimeras << endl;
1312                                 out.close();
1313                                 
1314                                 exit(0);
1315                         }else { 
1316                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1317                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1318                                 exit(0);
1319                         }
1320                 }
1321                 
1322                 //do my part
1323                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1324                 
1325                 //force parent to wait until all the processes are done
1326                 for (int i=0;i<processIDS.size();i++) { 
1327                         int temp = processIDS[i];
1328                         wait(&temp);
1329                 }
1330                 
1331                 for (int i = 0; i < processIDS.size(); i++) {
1332                         ifstream in;
1333                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1334                         m->openInputFile(tempFile, in);
1335                         if (!in.eof()) { 
1336                                 int tempNum = 0; 
1337                                 in >> tempNum; m->gobble(in);
1338                                 num += tempNum; 
1339                                 in >> tempNum;
1340                                 numChimeras += tempNum;
1341                         }
1342                         in.close(); m->mothurRemove(tempFile);
1343                 }
1344                 
1345                 
1346                 //append output files
1347                 for(int i=0;i<processIDS[i];i++){
1348                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1349                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1350                         
1351                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1352                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1353                         
1354                         if (chimealns) {
1355                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1356                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1357                         }
1358                 }
1359                 
1360                 //get rid of the file pieces.
1361                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1362 #endif          
1363                 return num;     
1364         }
1365         catch(exception& e) {
1366                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1367                 exit(1);
1368         }
1369 }
1370 /**************************************************************************************************/
1371
1372 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups) {
1373         try {
1374                 
1375                 processIDS.clear();
1376                 int process = 1;
1377                 int num = 0;
1378                 
1379                 //sanity check
1380                 if (groups.size() < processors) { processors = groups.size(); }
1381                 
1382                 //divide the groups between the processors
1383                 vector<linePair> lines;
1384                 int numGroupsPerProcessor = groups.size() / processors;
1385                 for (int i = 0; i < processors; i++) {
1386                         int startIndex =  i * numGroupsPerProcessor;
1387                         int endIndex = (i+1) * numGroupsPerProcessor;
1388                         if(i == (processors - 1)){      endIndex = groups.size();       }
1389                         lines.push_back(linePair(startIndex, endIndex));
1390                 }
1391                 
1392 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
1393                                 
1394                 //loop through and create all the processes you want
1395                 while (process != processors) {
1396                         int pid = fork();
1397                         
1398                         if (pid > 0) {
1399                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1400                                 process++;
1401                         }else if (pid == 0){
1402                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1403                                 
1404                                 //pass numSeqs to parent
1405                                 ofstream out;
1406                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1407                                 m->openOutputFile(tempFile, out);
1408                                 out << num << endl;
1409                                 out.close();
1410                                 
1411                                 exit(0);
1412                         }else { 
1413                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1414                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1415                                 exit(0);
1416                         }
1417                 }
1418                 
1419                 //do my part
1420                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1421                 
1422                 //force parent to wait until all the processes are done
1423                 for (int i=0;i<processIDS.size();i++) { 
1424                         int temp = processIDS[i];
1425                         wait(&temp);
1426                 }
1427 #endif          
1428                 
1429                 for (int i = 0; i < processIDS.size(); i++) {
1430                         ifstream in;
1431                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1432                         m->openInputFile(tempFile, in);
1433                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1434                         in.close(); m->mothurRemove(tempFile);
1435                 }
1436                 
1437                 
1438                 //append output files
1439                 for(int i=0;i<processIDS[i];i++){
1440                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1441                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1442                         
1443                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1444                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1445                         
1446                         if (chimealns) {
1447                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1448                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1449                         }
1450                 }
1451                 
1452                 return num;     
1453                 
1454         }
1455         catch(exception& e) {
1456                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1457                 exit(1);
1458         }
1459 }
1460 /**************************************************************************************************/
1461