]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
changes while testing.
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){   
57         try {
58                 string helpString = "";
59                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
68                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
86 #ifdef USE_MPI
87                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
88 #endif
89                 helpString += "The chimera.uchime command should be in the following format: \n";
90                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
102         try {
103                 abort = true; calledHelp = true;
104                 setParameters();
105                 vector<string> tempOutNames;
106                 outputTypes["chimera"] = tempOutNames;
107                 outputTypes["accnos"] = tempOutNames;
108                 outputTypes["alns"] = tempOutNames;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112                 exit(1);
113         }
114 }
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
117         try {
118                 abort = false; calledHelp = false; 
119                 ReferenceDB* rdb = ReferenceDB::getInstance();
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter("chimera.uchime");
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         vector<string> tempOutNames;
140                         outputTypes["chimera"] = tempOutNames;
141                         outputTypes["accnos"] = tempOutNames;
142                         outputTypes["alns"] = tempOutNames;
143                         
144                         //if the user changes the input directory command factory will send this info to us in the output parameter 
145                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
146                         if (inputDir == "not found"){   inputDir = "";          }
147                         
148                         //check for required parameters
149                         fastafile = validParameter.validFile(parameters, "fasta", false);
150                         if (fastafile == "not found") {                                 
151                                 //if there is a current fasta file, use it
152                                 string filename = m->getFastaFile(); 
153                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
155                         }else { 
156                                 m->splitAtDash(fastafile, fastaFileNames);
157                                 
158                                 //go through files and make sure they are good, if not, then disregard them
159                                 for (int i = 0; i < fastaFileNames.size(); i++) {
160                                         
161                                         bool ignore = false;
162                                         if (fastaFileNames[i] == "current") { 
163                                                 fastaFileNames[i] = m->getFastaFile(); 
164                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
165                                                 else {  
166                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
167                                                         //erase from file list
168                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
169                                                         i--;
170                                                 }
171                                         }
172                                         
173                                         if (!ignore) {
174                                                 
175                                                 if (inputDir != "") {
176                                                         string path = m->hasPath(fastaFileNames[i]);
177                                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
179                                                 }
180                                                 
181                                                 int ableToOpen;
182                                                 ifstream in;
183                                                 
184                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
185                                                 
186                                                 //if you can't open it, try default location
187                                                 if (ableToOpen == 1) {
188                                                         if (m->getDefaultPath() != "") { //default path is set
189                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
191                                                                 ifstream in2;
192                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
193                                                                 in2.close();
194                                                                 fastaFileNames[i] = tryPath;
195                                                         }
196                                                 }
197                                                 
198                                                 if (ableToOpen == 1) {
199                                                         if (m->getOutputDir() != "") { //default path is set
200                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
202                                                                 ifstream in2;
203                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
204                                                                 in2.close();
205                                                                 fastaFileNames[i] = tryPath;
206                                                         }
207                                                 }
208                                                 
209                                                 in.close();
210                                                 
211                                                 if (ableToOpen == 1) { 
212                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
213                                                         //erase from file list
214                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
215                                                         i--;
216                                                 }else {
217                                                         m->setFastaFile(fastaFileNames[i]);
218                                                 }
219                                         }
220                                 }
221                                 
222                                 //make sure there is at least one valid file left
223                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
224                         }
225                         
226                         
227                         //check for required parameters
228                         bool hasName = true;
229                         namefile = validParameter.validFile(parameters, "name", false);
230                         if (namefile == "not found") { namefile = "";  hasName = false; }
231                         else { 
232                                 m->splitAtDash(namefile, nameFileNames);
233                                 
234                                 //go through files and make sure they are good, if not, then disregard them
235                                 for (int i = 0; i < nameFileNames.size(); i++) {
236                                         
237                                         bool ignore = false;
238                                         if (nameFileNames[i] == "current") { 
239                                                 nameFileNames[i] = m->getNameFile(); 
240                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
241                                                 else {  
242                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
243                                                         //erase from file list
244                                                         nameFileNames.erase(nameFileNames.begin()+i);
245                                                         i--;
246                                                 }
247                                         }
248                                         
249                                         if (!ignore) {
250                                                 
251                                                 if (inputDir != "") {
252                                                         string path = m->hasPath(nameFileNames[i]);
253                                                         //if the user has not given a path then, add inputdir. else leave path alone.
254                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
255                                                 }
256                                                 
257                                                 int ableToOpen;
258                                                 ifstream in;
259                                                 
260                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
261                                                 
262                                                 //if you can't open it, try default location
263                                                 if (ableToOpen == 1) {
264                                                         if (m->getDefaultPath() != "") { //default path is set
265                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
267                                                                 ifstream in2;
268                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
269                                                                 in2.close();
270                                                                 nameFileNames[i] = tryPath;
271                                                         }
272                                                 }
273                                                 
274                                                 if (ableToOpen == 1) {
275                                                         if (m->getOutputDir() != "") { //default path is set
276                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
278                                                                 ifstream in2;
279                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280                                                                 in2.close();
281                                                                 nameFileNames[i] = tryPath;
282                                                         }
283                                                 }
284                                                 
285                                                 in.close();
286                                                 
287                                                 if (ableToOpen == 1) { 
288                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
289                                                         //erase from file list
290                                                         nameFileNames.erase(nameFileNames.begin()+i);
291                                                         i--;
292                                                 }else {
293                                                         m->setNameFile(nameFileNames[i]);
294                                                 }
295                                         }
296                                 }
297                                 
298                                 //make sure there is at least one valid file left
299                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
300                         }
301                         
302                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
303                         
304                         bool hasGroup = true;
305                         groupfile = validParameter.validFile(parameters, "group", false);
306                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
307                         else { 
308                                 m->splitAtDash(groupfile, groupFileNames);
309                                 
310                                 //go through files and make sure they are good, if not, then disregard them
311                                 for (int i = 0; i < groupFileNames.size(); i++) {
312                                         
313                                         bool ignore = false;
314                                         if (groupFileNames[i] == "current") { 
315                                                 groupFileNames[i] = m->getGroupFile(); 
316                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
317                                                 else {  
318                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
319                                                         //erase from file list
320                                                         groupFileNames.erase(groupFileNames.begin()+i);
321                                                         i--;
322                                                 }
323                                         }
324                                         
325                                         if (!ignore) {
326                                                 
327                                                 if (inputDir != "") {
328                                                         string path = m->hasPath(groupFileNames[i]);
329                                                         //if the user has not given a path then, add inputdir. else leave path alone.
330                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
331                                                 }
332                                                 
333                                                 int ableToOpen;
334                                                 ifstream in;
335                                                 
336                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
337                                                 
338                                                 //if you can't open it, try default location
339                                                 if (ableToOpen == 1) {
340                                                         if (m->getDefaultPath() != "") { //default path is set
341                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
343                                                                 ifstream in2;
344                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
345                                                                 in2.close();
346                                                                 groupFileNames[i] = tryPath;
347                                                         }
348                                                 }
349                                                 
350                                                 if (ableToOpen == 1) {
351                                                         if (m->getOutputDir() != "") { //default path is set
352                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
354                                                                 ifstream in2;
355                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
356                                                                 in2.close();
357                                                                 groupFileNames[i] = tryPath;
358                                                         }
359                                                 }
360                                                 
361                                                 in.close();
362                                                 
363                                                 if (ableToOpen == 1) { 
364                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
365                                                         //erase from file list
366                                                         groupFileNames.erase(groupFileNames.begin()+i);
367                                                         i--;
368                                                 }else {
369                                                         m->setGroupFile(groupFileNames[i]);
370                                                 }
371                                         }
372                                 }
373                                 
374                                 //make sure there is at least one valid file left
375                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
376                         }
377                         
378                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
379                         
380                         
381                         //if the user changes the output directory command factory will send this info to us in the output parameter 
382                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
383                         
384                         string path;
385                         it = parameters.find("reference");
386                         //user has given a template file
387                         if(it != parameters.end()){ 
388                                 if (it->second == "self") { templatefile = "self"; }
389                                 else {
390                                         path = m->hasPath(it->second);
391                                         //if the user has not given a path then, add inputdir. else leave path alone.
392                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
393                                         
394                                         templatefile = validParameter.validFile(parameters, "reference", true);
395                                         if (templatefile == "not open") { abort = true; }
396                                         else if (templatefile == "not found") { //check for saved reference sequences
397                                                 if (rdb->getSavedReference() != "") {
398                                                         templatefile = rdb->getSavedReference();
399                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
400                                                 }else {
401                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
402                                                         m->mothurOutEndLine();
403                                                         abort = true; 
404                                                 }
405                                         }
406                                 }
407                         }else if (hasName) {  templatefile = "self"; }
408                         else { 
409                                 if (rdb->getSavedReference() != "") {
410                                         templatefile = rdb->getSavedReference();
411                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
412                                 }else {
413                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
414                                         m->mothurOutEndLine();
415                                         templatefile = ""; abort = true; 
416                                 } 
417                         }
418                                 
419                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
420                         m->setProcessors(temp);
421                         m->mothurConvert(temp, processors);
422                         
423                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
424                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
425                         
426                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
427                         chimealns = m->isTrue(temp); 
428                         
429                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
430                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
431                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
432                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
433                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
434                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
435                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
436                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
437                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
438                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
439                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
440                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
441                         
442                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
443                         ucl = m->isTrue(temp);
444                         
445                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
446                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
447                         
448                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
449                         skipgaps = m->isTrue(temp); 
450
451                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
452                         skipgaps2 = m->isTrue(temp); 
453                         
454                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
456                         
457                         //look for uchime exe
458                         path = m->argv;
459                         string tempPath = path;
460                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461                         path = path.substr(0, (tempPath.find_last_of('m')));
462                         
463                         string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
465                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
466 #else
467                         uchimeCommand = path + "uchime.exe";
468 #endif
469                         
470                         //test to make sure uchime exists
471                         ifstream in;
472                         uchimeCommand = m->getFullPathName(uchimeCommand);
473                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
474                         if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
475                 }
476         }
477         catch(exception& e) {
478                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
479                 exit(1);
480         }
481 }
482 //***************************************************************************************************************
483
484 int ChimeraUchimeCommand::execute(){
485         try{
486                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
487                 
488                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
489                 
490                 for (int s = 0; s < fastaFileNames.size(); s++) {
491                         
492                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
493                         
494                         int start = time(NULL); 
495                         string nameFile = "";
496                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
497                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
498                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
499                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
500                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
501                                 
502                         //you provided a groupfile
503                         string groupFile = "";
504                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
505                         
506                         if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
507
508                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
509                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
510                                         nameFile = nameFileNames[s];
511                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
512                                                                                 
513                                 map<string, string> seqs;  
514                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
515
516                                 //read namefile
517                                 vector<seqPriorityNode> nameMapCount;
518                                 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
519                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
520                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
521                                 
522                                 printFile(nameMapCount, newFasta);
523                                 fastaFileNames[s] = newFasta;
524                         }
525                         
526                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
527                         
528                         if (groupFile != "") {
529                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
530                                         nameFile = nameFileNames[s];
531                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
532                                 
533                                 //Parse sequences by group
534                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
535                                 vector<string> groups = parser.getNamesOfGroups();
536                                         
537                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
538                                                                 
539                                 //clears files
540                                 ofstream out, out1, out2;
541                                 m->openOutputFile(outputFileName, out); out.close(); 
542                                 m->openOutputFile(accnosFileName, out1); out1.close();
543                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
544                                 int totalSeqs = 0;
545                                 
546                                 if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
547                                 else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
548
549                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
550
551                                 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
552                                 
553                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
554                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
555                                 
556                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
557                                         
558                         }else{
559                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
560                         
561                                 int numSeqs = 0;
562                                 int numChimeras = 0;
563
564                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
565                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
566                                 
567                                 //add headings
568                                 ofstream out;
569                                 m->openOutputFile(outputFileName+".temp", out); 
570                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
571                                 out.close();
572                                 
573                                 m->appendFiles(outputFileName, outputFileName+".temp");
574                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
575                                 
576                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
577                         
578                                 //remove file made for uchime
579                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
580                         
581                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
582                         }
583                         
584                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
585                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
586                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
587                 }
588         
589                 //set accnos file as new current accnosfile
590                 string current = "";
591                 itTypes = outputTypes.find("accnos");
592                 if (itTypes != outputTypes.end()) {
593                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
594                 }
595                 
596                 m->mothurOutEndLine();
597                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
598                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
599                 m->mothurOutEndLine();
600                 
601                 return 0;
602                 
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
606                 exit(1);
607         }
608 }
609 //**********************************************************************************************************************
610 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
611         try {
612                 map<string, string> uniqueNames = parser.getAllSeqsMap();
613                 map<string, string>::iterator itUnique;
614                 int total = 0;
615                 
616                 //edit accnos file
617                 ifstream in2; 
618                 m->openInputFile(accnosFileName, in2);
619                 
620                 ofstream out2;
621                 m->openOutputFile(accnosFileName+".temp", out2);
622                 
623                 string name;
624                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
625                 set<string>::iterator itNames;
626                 set<string> chimerasInFile;
627                 set<string>::iterator itChimeras;
628
629                 
630                 while (!in2.eof()) {
631                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
632                         
633                         in2 >> name; m->gobble(in2);
634                         
635                         //find unique name
636                         itUnique = uniqueNames.find(name);
637                         
638                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
639                         else {
640                                 itChimeras = chimerasInFile.find((itUnique->second));
641                                 
642                                 if (itChimeras == chimerasInFile.end()) {
643                                         out2 << itUnique->second << endl;
644                                         chimerasInFile.insert((itUnique->second));
645                                         total++;
646                                 }
647                         }
648                 }
649                 in2.close();
650                 out2.close();
651                 
652                 m->mothurRemove(accnosFileName);
653                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
654                 
655                 
656                 
657                 //edit chimera file
658                 ifstream in; 
659                 m->openInputFile(outputFileName, in);
660                 
661                 ofstream out;
662                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
663                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
664                 
665                 float temp1;
666                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
667                 name = "";
668                 namesInFile.clear();    
669                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
670                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
671                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
672                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
673                 */
674                 
675                 while (!in.eof()) {
676                         
677                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
678                         
679                         bool print = false;
680                         in >> temp1;    m->gobble(in);
681                         in >> name;             m->gobble(in);
682                         in >> parent1;  m->gobble(in);
683                         in >> parent2;  m->gobble(in);
684                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
685                         m->gobble(in);
686                         
687                         //parse name - name will look like U68590/ab=1/
688                         string restOfName = "";
689                         int pos = name.find_first_of('/');
690                         if (pos != string::npos) {
691                                 restOfName = name.substr(pos);
692                                 name = name.substr(0, pos);
693                         }
694                         
695                         //find unique name
696                         itUnique = uniqueNames.find(name);
697                         
698                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
699                         else {
700                                 name = itUnique->second;
701                                 //is this name already in the file
702                                 itNames = namesInFile.find((name));
703                                 
704                                 if (itNames == namesInFile.end()) { //no not in file
705                                         if (flag == "N") { //are you really a no??
706                                                 //is this sequence really not chimeric??
707                                                 itChimeras = chimerasInFile.find(name);
708                                                 
709                                                 //then you really are a no so print, otherwise skip
710                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
711                                         }else{ print = true; }
712                                 }
713                         }
714                         
715                         if (print) {
716                                 out << temp1 << '\t' << name << restOfName << '\t';
717                                 namesInFile.insert(name);
718                                 
719                                 //parse parent1 names
720                                 if (parent1 != "*") {
721                                         restOfName = "";
722                                         pos = parent1.find_first_of('/');
723                                         if (pos != string::npos) {
724                                                 restOfName = parent1.substr(pos);
725                                                 parent1 = parent1.substr(0, pos);
726                                         }
727                                         
728                                         itUnique = uniqueNames.find(parent1);
729                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
730                                         else {  out << itUnique->second << restOfName << '\t';  }
731                                 }else { out << parent1 << '\t'; }
732                                 
733                                 //parse parent2 names
734                                 if (parent2 != "*") {
735                                         restOfName = "";
736                                         pos = parent2.find_first_of('/');
737                                         if (pos != string::npos) {
738                                                 restOfName = parent2.substr(pos);
739                                                 parent2 = parent2.substr(0, pos);
740                                         }
741                                         
742                                         itUnique = uniqueNames.find(parent2);
743                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
744                                         else {  out << itUnique->second << restOfName << '\t';  }
745                                 }else { out << parent2 << '\t'; }
746                                 
747                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
748                         }
749                 }
750                 in.close();
751                 out.close();
752                 
753                 m->mothurRemove(outputFileName);
754                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
755                 
756                                 
757                 //edit anls file
758                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
759                 /*
760                  ------------------------------------------------------------------------
761                  Query   (  179 nt) F21Fcsw_11639/ab=591/
762                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
763                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
764                  
765                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
766                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
767                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
768                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
769                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
770                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
771                  
772                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
773                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
774                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
775                  Diffs      NNN     N N  N                   N  N BB    NNN                              
776                  Votes      000     0 0  0                   0  0 ++    000                              
777                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
778                  
779                  A   159 TGGAACTGAGACACGGTCCAA 179
780                  Q   159 TGGAACTGAGACACGGTCCAA 179
781                  B   161 TGGAACTGAGACACGGTCCAA 181
782                  Diffs                        
783                  Votes                        
784                  Model   BBBBBBBBBBBBBBBBBBBBB
785                  
786                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
787                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
788                 */
789                 if (chimealns) {
790                         ifstream in3; 
791                         m->openInputFile(alnsFileName, in3);
792                 
793                         ofstream out3;
794                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
795                 
796                         name = "";
797                         namesInFile.clear();
798                         string line = "";
799                         
800                         while (!in3.eof()) {
801                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
802                                 
803                                 line = "";
804                                 line = m->getline(in3); 
805                                 string temp = "";
806                                 
807                                 if (line != "") {
808                                         istringstream iss(line);
809                                         iss >> temp;
810                                         
811                                         //are you a name line
812                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
813                                                 int spot = 0;
814                                                 for (int i = 0; i < line.length(); i++) {
815                                                         spot = i;
816                                                         if (line[i] == ')') { break; }
817                                                         else { out3 << line[i]; }
818                                                 }
819                                                 
820                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
821                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
822                                                 else {
823                                                         out << line[spot] << line[spot+1];
824                                                         
825                                                         name = line.substr(spot+2);
826                                                         
827                                                         //parse name - name will either look like U68590/ab=1/ or U68590
828                                                         string restOfName = "";
829                                                         int pos = name.find_first_of('/');
830                                                         if (pos != string::npos) {
831                                                                 restOfName = name.substr(pos);
832                                                                 name = name.substr(0, pos);
833                                                         }
834                                                         
835                                                         //find unique name
836                                                         itUnique = uniqueNames.find(name);
837                                                         
838                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
839                                                         else {
840                                                                 //only limit repeats on query names
841                                                                 if (temp == "Query") {
842                                                                         itNames = namesInFile.find((itUnique->second));
843                                                                         
844                                                                         if (itNames == namesInFile.end()) {
845                                                                                 out << itUnique->second << restOfName << endl;
846                                                                                 namesInFile.insert((itUnique->second));
847                                                                         }
848                                                                 }else { out << itUnique->second << restOfName << endl;  }
849                                                         }
850                                                         
851                                                 }
852                                                 
853                                         }else { //not need to alter line
854                                                 out3 << line << endl;
855                                         }
856                                 }else { out3 << endl; }
857                         }
858                         in3.close();
859                         out3.close();
860                         
861                         m->mothurRemove(alnsFileName);
862                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
863                 }
864                 
865                 return total;
866         }
867         catch(exception& e) {
868                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
869                 exit(1);
870         }
871 }       
872 //**********************************************************************************************************************
873 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
874         try {
875                 
876                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
877                 
878                 ofstream out;
879                 m->openOutputFile(filename, out);
880                 
881                 //print new file in order of
882                 for (int i = 0; i < nameMapCount.size(); i++) {
883                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
884                 }
885                 out.close();
886                 
887                 return 0;
888         }
889         catch(exception& e) {
890                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
891                 exit(1);
892         }
893 }       
894 //**********************************************************************************************************************
895 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
896         try {
897                 //create input file for uchime
898                 //read through fastafile and store info
899                 ifstream in;
900                 m->openInputFile(filename, in);
901                 
902                 while (!in.eof()) {
903                         
904                         if (m->control_pressed) { in.close(); return 0; }
905                         
906                         Sequence seq(in); m->gobble(in);
907                         seqs[seq.getName()] = seq.getAligned();
908                 }
909                 in.close();
910                 
911                 return 0;
912         }
913         catch(exception& e) {
914                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
915                 exit(1);
916         }
917 }       
918 //**********************************************************************************************************************
919
920 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
921         try {
922                 string nameFile = "";
923                 
924                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
925                 
926                 //use unique.seqs to create new name and fastafile
927                 string inputString = "fasta=" + inputFile;
928                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
929                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
930                 m->mothurCalling = true;
931         
932                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
933                 uniqueCommand->execute();
934                 
935                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
936                 
937                 delete uniqueCommand;
938                 m->mothurCalling = false;
939                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
940                 
941                 nameFile = filenames["name"][0];
942                 inputFile = filenames["fasta"][0];
943                 
944                 return nameFile;
945         }
946         catch(exception& e) {
947                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
948                 exit(1);
949         }
950 }
951 //**********************************************************************************************************************
952 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
953         try {
954                 
955                 int totalSeqs = 0;
956                 int numChimeras = 0;
957                 
958                 for (int i = start; i < end; i++) {
959                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
960                         
961                         int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
962                         
963                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
964                         totalSeqs += numSeqs;
965                         
966                         if (m->control_pressed) { return 0; }
967                         
968                         //remove file made for uchime
969                         m->mothurRemove(filename);
970                         
971                         //append files
972                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
973                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
974                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
975                         
976                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
977                 }       
978                 
979                 return totalSeqs;
980                 
981         }
982         catch(exception& e) {
983                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
984                 exit(1);
985         }
986 }       
987 //**********************************************************************************************************************
988
989 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
990         try {
991                 
992                 outputFName = m->getFullPathName(outputFName);
993                 filename = m->getFullPathName(filename);
994                 alns = m->getFullPathName(alns);
995                 
996                 //to allow for spaces in the path
997                 outputFName = "\"" + outputFName + "\"";
998                 filename = "\"" + filename + "\"";
999                 alns = "\"" + alns + "\"";
1000                                 
1001                 vector<char*> cPara;
1002                 
1003                 string path = m->argv;
1004                 string tempPath = path;
1005                 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
1006                 path = path.substr(0, (tempPath.find_last_of('m')));
1007                 
1008                 string uchimeCommand = path;
1009 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1010                 uchimeCommand += "uchime ";
1011 #else
1012                 uchimeCommand += "uchime";
1013                 uchimeCommand = "\"" + uchimeCommand + "\"";
1014 #endif
1015                 
1016                 char* tempUchime;
1017                 tempUchime= new char[uchimeCommand.length()+1]; 
1018                 *tempUchime = '\0';
1019                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1020                 cPara.push_back(tempUchime);
1021                 
1022                 char* tempIn = new char[8]; 
1023                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1024                 //strcpy(tempIn, "--input"); 
1025                 cPara.push_back(tempIn);
1026                 char* temp = new char[filename.length()+1];
1027                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1028                 //strcpy(temp, filename.c_str());
1029                 cPara.push_back(temp);
1030                 
1031                 //are you using a reference file
1032                 if (templatefile != "self") {
1033                         //add reference file
1034                         char* tempRef = new char[5]; 
1035                         //strcpy(tempRef, "--db"); 
1036                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1037                         cPara.push_back(tempRef);  
1038                         char* tempR = new char[templatefile.length()+1];
1039                         //strcpy(tempR, templatefile.c_str());
1040                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1041                         cPara.push_back(tempR);
1042                 }
1043                 
1044                 char* tempO = new char[12]; 
1045                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1046                 //strcpy(tempO, "--uchimeout"); 
1047                 cPara.push_back(tempO);
1048                 char* tempout = new char[outputFName.length()+1];
1049                 //strcpy(tempout, outputFName.c_str());
1050                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1051                 cPara.push_back(tempout);
1052                 
1053                 if (chimealns) {
1054                         char* tempA = new char[13]; 
1055                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1056                         //strcpy(tempA, "--uchimealns"); 
1057                         cPara.push_back(tempA);
1058                         char* tempa = new char[alns.length()+1];
1059                         //strcpy(tempa, alns.c_str());
1060                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1061                         cPara.push_back(tempa);
1062                 }
1063                 
1064                 if (useAbskew) {
1065                         char* tempskew = new char[9];
1066                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1067                         //strcpy(tempskew, "--abskew"); 
1068                         cPara.push_back(tempskew);
1069                         char* tempSkew = new char[abskew.length()+1];
1070                         //strcpy(tempSkew, abskew.c_str());
1071                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1072                         cPara.push_back(tempSkew);
1073                 }
1074                 
1075                 if (useMinH) {
1076                         char* tempminh = new char[7]; 
1077                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1078                         //strcpy(tempminh, "--minh"); 
1079                         cPara.push_back(tempminh);
1080                         char* tempMinH = new char[minh.length()+1];
1081                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1082                         //strcpy(tempMinH, minh.c_str());
1083                         cPara.push_back(tempMinH);
1084                 }
1085                 
1086                 if (useMindiv) {
1087                         char* tempmindiv = new char[9]; 
1088                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1089                         //strcpy(tempmindiv, "--mindiv"); 
1090                         cPara.push_back(tempmindiv);
1091                         char* tempMindiv = new char[mindiv.length()+1];
1092                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1093                         //strcpy(tempMindiv, mindiv.c_str());
1094                         cPara.push_back(tempMindiv);
1095                 }
1096                 
1097                 if (useXn) {
1098                         char* tempxn = new char[5]; 
1099                         //strcpy(tempxn, "--xn"); 
1100                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1101                         cPara.push_back(tempxn);
1102                         char* tempXn = new char[xn.length()+1];
1103                         //strcpy(tempXn, xn.c_str());
1104                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1105                         cPara.push_back(tempXn);
1106                 }
1107                 
1108                 if (useDn) {
1109                         char* tempdn = new char[5]; 
1110                         //strcpy(tempdn, "--dn"); 
1111                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1112                         cPara.push_back(tempdn);
1113                         char* tempDn = new char[dn.length()+1];
1114                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1115                         //strcpy(tempDn, dn.c_str());
1116                         cPara.push_back(tempDn);
1117                 }
1118                 
1119                 if (useXa) {
1120                         char* tempxa = new char[5]; 
1121                         //strcpy(tempxa, "--xa"); 
1122                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1123                         cPara.push_back(tempxa);
1124                         char* tempXa = new char[xa.length()+1];
1125                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1126                         //strcpy(tempXa, xa.c_str());
1127                         cPara.push_back(tempXa);
1128                 }
1129                 
1130                 if (useChunks) {
1131                         char* tempchunks = new char[9]; 
1132                         //strcpy(tempchunks, "--chunks"); 
1133                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1134                         cPara.push_back(tempchunks);
1135                         char* tempChunks = new char[chunks.length()+1];
1136                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1137                         //strcpy(tempChunks, chunks.c_str());
1138                         cPara.push_back(tempChunks);
1139                 }
1140                 
1141                 if (useMinchunk) {
1142                         char* tempminchunk = new char[11]; 
1143                         //strcpy(tempminchunk, "--minchunk"); 
1144                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1145                         cPara.push_back(tempminchunk);
1146                         char* tempMinchunk = new char[minchunk.length()+1];
1147                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1148                         //strcpy(tempMinchunk, minchunk.c_str());
1149                         cPara.push_back(tempMinchunk);
1150                 }
1151                 
1152                 if (useIdsmoothwindow) {
1153                         char* tempidsmoothwindow = new char[17]; 
1154                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1155                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1156                         cPara.push_back(tempidsmoothwindow);
1157                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1158                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1159                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1160                         cPara.push_back(tempIdsmoothwindow);
1161                 }
1162                 
1163                 /*if (useMinsmoothid) {
1164                         char* tempminsmoothid = new char[14]; 
1165                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1166                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1167                         cPara.push_back(tempminsmoothid);
1168                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1169                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1170                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1171                         cPara.push_back(tempMinsmoothid);
1172                 }*/
1173                 
1174                 if (useMaxp) {
1175                         char* tempmaxp = new char[7]; 
1176                         //strcpy(tempmaxp, "--maxp"); 
1177                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1178                         cPara.push_back(tempmaxp);
1179                         char* tempMaxp = new char[maxp.length()+1];
1180                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1181                         //strcpy(tempMaxp, maxp.c_str());
1182                         cPara.push_back(tempMaxp);
1183                 }
1184                 
1185                 if (!skipgaps) {
1186                         char* tempskipgaps = new char[13]; 
1187                         //strcpy(tempskipgaps, "--[no]skipgaps");
1188                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1189                         cPara.push_back(tempskipgaps);
1190                 }
1191                 
1192                 if (!skipgaps2) {
1193                         char* tempskipgaps2 = new char[14]; 
1194                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1195                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1196                         cPara.push_back(tempskipgaps2);
1197                 }
1198                 
1199                 if (useMinlen) {
1200                         char* tempminlen = new char[9]; 
1201                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1202                         //strcpy(tempminlen, "--minlen"); 
1203                         cPara.push_back(tempminlen);
1204                         char* tempMinlen = new char[minlen.length()+1];
1205                         //strcpy(tempMinlen, minlen.c_str());
1206                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1207                         cPara.push_back(tempMinlen);
1208                 }
1209                 
1210                 if (useMaxlen) {
1211                         char* tempmaxlen = new char[9]; 
1212                         //strcpy(tempmaxlen, "--maxlen"); 
1213                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1214                         cPara.push_back(tempmaxlen);
1215                         char* tempMaxlen = new char[maxlen.length()+1];
1216                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1217                         //strcpy(tempMaxlen, maxlen.c_str());
1218                         cPara.push_back(tempMaxlen);
1219                 }
1220                 
1221                 if (ucl) {
1222                         char* tempucl = new char[5]; 
1223                         strcpy(tempucl, "--ucl"); 
1224                         cPara.push_back(tempucl);
1225                 }
1226                 
1227                 if (useQueryfract) {
1228                         char* tempqueryfract = new char[13]; 
1229                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1230                         //strcpy(tempqueryfract, "--queryfract"); 
1231                         cPara.push_back(tempqueryfract);
1232                         char* tempQueryfract = new char[queryfract.length()+1];
1233                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1234                         //strcpy(tempQueryfract, queryfract.c_str());
1235                         cPara.push_back(tempQueryfract);
1236                 }
1237                 
1238                 
1239                 char** uchimeParameters;
1240                 uchimeParameters = new char*[cPara.size()];
1241                 string commandString = "";
1242                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1243                 //int numArgs = cPara.size();
1244                 
1245                 //uchime_main(numArgs, uchimeParameters); 
1246                 //cout << "commandString = " << commandString << endl;
1247 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1248 #else
1249                 commandString = "\"" + commandString + "\"";
1250 #endif
1251                 system(commandString.c_str());
1252                 
1253                 //free memory
1254                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1255                 delete[] uchimeParameters; 
1256                 
1257                 //remove "" from filenames
1258                 outputFName = outputFName.substr(1, outputFName.length()-2);
1259                 filename = filename.substr(1, filename.length()-2);
1260                 alns = alns.substr(1, alns.length()-2);
1261                 
1262                 if (m->control_pressed) { return 0; }
1263                 
1264                 //create accnos file from uchime results
1265                 ifstream in; 
1266                 m->openInputFile(outputFName, in);
1267                 
1268                 ofstream out;
1269                 m->openOutputFile(accnos, out);
1270                 
1271                 int num = 0;
1272                 numChimeras = 0;
1273                 while(!in.eof()) {
1274                         
1275                         if (m->control_pressed) { break; }
1276                         
1277                         string name = "";
1278                         string chimeraFlag = "";
1279                         in >> chimeraFlag >> name;
1280                         
1281                         //fix name if needed
1282                         if (templatefile == "self") { 
1283                                 name = name.substr(0, name.length()-1); //rip off last /
1284                                 name = name.substr(0, name.find_last_of('/'));
1285                         }
1286                         
1287                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1288                         m->gobble(in);
1289                         
1290                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1291                         num++;
1292                 }
1293                 in.close();
1294                 out.close();
1295                 
1296                 return num;
1297         }
1298         catch(exception& e) {
1299                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1300                 exit(1);
1301         }
1302 }
1303 /**************************************************************************************************/
1304
1305 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1306         try {
1307                 
1308                 processIDS.clear();
1309                 int process = 1;
1310                 int num = 0;
1311                 vector<string> files;
1312                 
1313 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1314                 //break up file into multiple files
1315                 m->divideFile(filename, processors, files);
1316                 
1317                 if (m->control_pressed) {  return 0;  }
1318                                 
1319                 //loop through and create all the processes you want
1320                 while (process != processors) {
1321                         int pid = fork();
1322                         
1323                         if (pid > 0) {
1324                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1325                                 process++;
1326                         }else if (pid == 0){
1327                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1328                                 
1329                                 //pass numSeqs to parent
1330                                 ofstream out;
1331                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1332                                 m->openOutputFile(tempFile, out);
1333                                 out << num << endl;
1334                                 out << numChimeras << endl;
1335                                 out.close();
1336                                 
1337                                 exit(0);
1338                         }else { 
1339                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1340                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1341                                 exit(0);
1342                         }
1343                 }
1344                 
1345                 //do my part
1346                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1347                 
1348                 //force parent to wait until all the processes are done
1349                 for (int i=0;i<processIDS.size();i++) { 
1350                         int temp = processIDS[i];
1351                         wait(&temp);
1352                 }
1353                 
1354                 for (int i = 0; i < processIDS.size(); i++) {
1355                         ifstream in;
1356                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1357                         m->openInputFile(tempFile, in);
1358                         if (!in.eof()) { 
1359                                 int tempNum = 0; 
1360                                 in >> tempNum; m->gobble(in);
1361                                 num += tempNum; 
1362                                 in >> tempNum;
1363                                 numChimeras += tempNum;
1364                         }
1365                         in.close(); m->mothurRemove(tempFile);
1366                 }
1367 #else
1368                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1369                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1370                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1371                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1372                 
1373                 //divide file
1374                 int count = 0;
1375                 int spot = 0;
1376                 map<int, ofstream*> filehandles;
1377                 map<int, ofstream*>::iterator it3;
1378                 
1379                 ofstream* temp;
1380                 for (int i = 0; i < processors; i++) {
1381                         temp = new ofstream;
1382                         filehandles[i] = temp;
1383                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1384                         files.push_back(filename+toString(i)+".temp");
1385                 }
1386                 
1387                 ifstream in;
1388                 m->openInputFile(filename, in);
1389                 
1390                 while(!in.eof()) {
1391                         
1392                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1393                         
1394                         Sequence tempSeq(in); m->gobble(in); 
1395                         
1396                         if (tempSeq.getName() != "") {
1397                                 tempSeq.printSequence(*(filehandles[spot])); 
1398                                 spot++; count++;
1399                                 if (spot == processors) { spot = 0; }
1400                         }
1401                 }
1402                 in.close();
1403                 
1404                 //delete memory
1405                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1406                         (*(it3->second)).close();
1407                         delete it3->second;
1408                 }
1409                 
1410                 //sanity check for number of processors
1411                 if (count < processors) { processors = count; }
1412                 
1413                 vector<uchimeData*> pDataArray; 
1414                 DWORD   dwThreadIdArray[processors-1];
1415                 HANDLE  hThreadArray[processors-1]; 
1416                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1417                 
1418                 //Create processor worker threads.
1419                 for( int i=1; i<processors; i++ ){
1420                         // Allocate memory for thread data.
1421                         string extension = toString(i) + ".temp";
1422                         
1423                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1424                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1425                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1426                         
1427                         pDataArray.push_back(tempUchime);
1428                         processIDS.push_back(i);
1429                         
1430                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1431                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1432                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1433                 }
1434                 
1435                 
1436                 //using the main process as a worker saves time and memory
1437                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1438                 
1439                 //Wait until all threads have terminated.
1440                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1441                 
1442                 //Close all thread handles and free memory allocations.
1443                 for(int i=0; i < pDataArray.size(); i++){
1444                         num += pDataArray[i]->count;
1445                         numChimeras += pDataArray[i]->numChimeras;
1446                         CloseHandle(hThreadArray[i]);
1447                         delete pDataArray[i];
1448                 }
1449 #endif          
1450                 
1451                 //append output files
1452                 for(int i=0;i<processIDS.size();i++){
1453                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1454                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1455                         
1456                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1457                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1458                         
1459                         if (chimealns) {
1460                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1461                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1462                         }
1463                 }
1464                 
1465                 //get rid of the file pieces.
1466                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1467                 return num;     
1468         }
1469         catch(exception& e) {
1470                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1471                 exit(1);
1472         }
1473 }
1474 /**************************************************************************************************/
1475
1476 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1477         try {
1478                 
1479                 processIDS.clear();
1480                 int process = 1;
1481                 int num = 0;
1482                 
1483                 //sanity check
1484                 if (groups.size() < processors) { processors = groups.size(); }
1485                 
1486                 //divide the groups between the processors
1487                 vector<linePair> lines;
1488                 int numGroupsPerProcessor = groups.size() / processors;
1489                 for (int i = 0; i < processors; i++) {
1490                         int startIndex =  i * numGroupsPerProcessor;
1491                         int endIndex = (i+1) * numGroupsPerProcessor;
1492                         if(i == (processors - 1)){      endIndex = groups.size();       }
1493                         lines.push_back(linePair(startIndex, endIndex));
1494                 }
1495                 
1496 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1497                                 
1498                 //loop through and create all the processes you want
1499                 while (process != processors) {
1500                         int pid = fork();
1501                         
1502                         if (pid > 0) {
1503                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1504                                 process++;
1505                         }else if (pid == 0){
1506                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1507                                 
1508                                 //pass numSeqs to parent
1509                                 ofstream out;
1510                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1511                                 m->openOutputFile(tempFile, out);
1512                                 out << num << endl;
1513                                 out.close();
1514                                 
1515                                 exit(0);
1516                         }else { 
1517                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1518                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1519                                 exit(0);
1520                         }
1521                 }
1522                 
1523                 //do my part
1524                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1525                 
1526                 //force parent to wait until all the processes are done
1527                 for (int i=0;i<processIDS.size();i++) { 
1528                         int temp = processIDS[i];
1529                         wait(&temp);
1530                 }
1531                 
1532                 for (int i = 0; i < processIDS.size(); i++) {
1533                         ifstream in;
1534                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1535                         m->openInputFile(tempFile, in);
1536                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1537                         in.close(); m->mothurRemove(tempFile);
1538                 }
1539                                 
1540 #else
1541                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1542                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1543                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1544                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1545                 
1546                 vector<uchimeData*> pDataArray; 
1547                 DWORD   dwThreadIdArray[processors-1];
1548                 HANDLE  hThreadArray[processors-1]; 
1549                 
1550                 //Create processor worker threads.
1551                 for( int i=1; i<processors; i++ ){
1552                         // Allocate memory for thread data.
1553                         string extension = toString(i) + ".temp";
1554                         
1555                         uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1556                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1557                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1558                         
1559                         pDataArray.push_back(tempUchime);
1560                         processIDS.push_back(i);
1561                         
1562                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1563                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1564                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1565                 }
1566                 
1567                 
1568                 //using the main process as a worker saves time and memory
1569                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1570                 
1571                 //Wait until all threads have terminated.
1572                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1573                 
1574                 //Close all thread handles and free memory allocations.
1575                 for(int i=0; i < pDataArray.size(); i++){
1576                         num += pDataArray[i]->count;
1577                         CloseHandle(hThreadArray[i]);
1578                         delete pDataArray[i];
1579                 }
1580 #endif          
1581                 
1582                                 
1583                 //append output files
1584                 for(int i=0;i<processIDS.size();i++){
1585                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1586                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1587                         
1588                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1589                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1590                         
1591                         if (chimealns) {
1592                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1593                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1594                         }
1595                 }
1596                 
1597                 return num;     
1598                 
1599         }
1600         catch(exception& e) {
1601                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1602                 exit(1);
1603         }
1604 }
1605 /**************************************************************************************************/
1606