]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){   
57         try {
58                 string helpString = "";
59                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
68                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
86 #ifdef USE_MPI
87                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
88 #endif
89                 helpString += "The chimera.uchime command should be in the following format: \n";
90                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
102         try {
103                 abort = true; calledHelp = true;
104                 setParameters();
105                 vector<string> tempOutNames;
106                 outputTypes["chimera"] = tempOutNames;
107                 outputTypes["accnos"] = tempOutNames;
108                 outputTypes["alns"] = tempOutNames;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112                 exit(1);
113         }
114 }
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
117         try {
118                 abort = false; calledHelp = false; 
119                 ReferenceDB* rdb = ReferenceDB::getInstance();
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter("chimera.uchime");
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         vector<string> tempOutNames;
140                         outputTypes["chimera"] = tempOutNames;
141                         outputTypes["accnos"] = tempOutNames;
142                         outputTypes["alns"] = tempOutNames;
143                         
144                         //if the user changes the input directory command factory will send this info to us in the output parameter 
145                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
146                         if (inputDir == "not found"){   inputDir = "";          }
147                         
148                         //check for required parameters
149                         fastafile = validParameter.validFile(parameters, "fasta", false);
150                         if (fastafile == "not found") {                                 
151                                 //if there is a current fasta file, use it
152                                 string filename = m->getFastaFile(); 
153                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
155                         }else { 
156                                 m->splitAtDash(fastafile, fastaFileNames);
157                                 
158                                 //go through files and make sure they are good, if not, then disregard them
159                                 for (int i = 0; i < fastaFileNames.size(); i++) {
160                                         
161                                         bool ignore = false;
162                                         if (fastaFileNames[i] == "current") { 
163                                                 fastaFileNames[i] = m->getFastaFile(); 
164                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
165                                                 else {  
166                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
167                                                         //erase from file list
168                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
169                                                         i--;
170                                                 }
171                                         }
172                                         
173                                         if (!ignore) {
174                                                 
175                                                 if (inputDir != "") {
176                                                         string path = m->hasPath(fastaFileNames[i]);
177                                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
179                                                 }
180                                                 
181                                                 int ableToOpen;
182                                                 ifstream in;
183                                                 
184                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
185                                                 
186                                                 //if you can't open it, try default location
187                                                 if (ableToOpen == 1) {
188                                                         if (m->getDefaultPath() != "") { //default path is set
189                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
191                                                                 ifstream in2;
192                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
193                                                                 in2.close();
194                                                                 fastaFileNames[i] = tryPath;
195                                                         }
196                                                 }
197                                                 
198                                                 if (ableToOpen == 1) {
199                                                         if (m->getOutputDir() != "") { //default path is set
200                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
202                                                                 ifstream in2;
203                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
204                                                                 in2.close();
205                                                                 fastaFileNames[i] = tryPath;
206                                                         }
207                                                 }
208                                                 
209                                                 in.close();
210                                                 
211                                                 if (ableToOpen == 1) { 
212                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
213                                                         //erase from file list
214                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
215                                                         i--;
216                                                 }else {
217                                                         m->setFastaFile(fastaFileNames[i]);
218                                                 }
219                                         }
220                                 }
221                                 
222                                 //make sure there is at least one valid file left
223                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
224                         }
225                         
226                         
227                         //check for required parameters
228                         bool hasName = true;
229                         namefile = validParameter.validFile(parameters, "name", false);
230                         if (namefile == "not found") { namefile = "";  hasName = false; }
231                         else { 
232                                 m->splitAtDash(namefile, nameFileNames);
233                                 
234                                 //go through files and make sure they are good, if not, then disregard them
235                                 for (int i = 0; i < nameFileNames.size(); i++) {
236                                         
237                                         bool ignore = false;
238                                         if (nameFileNames[i] == "current") { 
239                                                 nameFileNames[i] = m->getNameFile(); 
240                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
241                                                 else {  
242                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
243                                                         //erase from file list
244                                                         nameFileNames.erase(nameFileNames.begin()+i);
245                                                         i--;
246                                                 }
247                                         }
248                                         
249                                         if (!ignore) {
250                                                 
251                                                 if (inputDir != "") {
252                                                         string path = m->hasPath(nameFileNames[i]);
253                                                         //if the user has not given a path then, add inputdir. else leave path alone.
254                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
255                                                 }
256                                                 
257                                                 int ableToOpen;
258                                                 ifstream in;
259                                                 
260                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
261                                                 
262                                                 //if you can't open it, try default location
263                                                 if (ableToOpen == 1) {
264                                                         if (m->getDefaultPath() != "") { //default path is set
265                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
267                                                                 ifstream in2;
268                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
269                                                                 in2.close();
270                                                                 nameFileNames[i] = tryPath;
271                                                         }
272                                                 }
273                                                 
274                                                 if (ableToOpen == 1) {
275                                                         if (m->getOutputDir() != "") { //default path is set
276                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
278                                                                 ifstream in2;
279                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280                                                                 in2.close();
281                                                                 nameFileNames[i] = tryPath;
282                                                         }
283                                                 }
284                                                 
285                                                 in.close();
286                                                 
287                                                 if (ableToOpen == 1) { 
288                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
289                                                         //erase from file list
290                                                         nameFileNames.erase(nameFileNames.begin()+i);
291                                                         i--;
292                                                 }else {
293                                                         m->setNameFile(nameFileNames[i]);
294                                                 }
295                                         }
296                                 }
297                                 
298                                 //make sure there is at least one valid file left
299                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
300                         }
301                         
302                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
303                         
304                         bool hasGroup = true;
305                         groupfile = validParameter.validFile(parameters, "group", false);
306                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
307                         else { 
308                                 m->splitAtDash(groupfile, groupFileNames);
309                                 
310                                 //go through files and make sure they are good, if not, then disregard them
311                                 for (int i = 0; i < groupFileNames.size(); i++) {
312                                         
313                                         bool ignore = false;
314                                         if (groupFileNames[i] == "current") { 
315                                                 groupFileNames[i] = m->getGroupFile(); 
316                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
317                                                 else {  
318                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
319                                                         //erase from file list
320                                                         groupFileNames.erase(groupFileNames.begin()+i);
321                                                         i--;
322                                                 }
323                                         }
324                                         
325                                         if (!ignore) {
326                                                 
327                                                 if (inputDir != "") {
328                                                         string path = m->hasPath(groupFileNames[i]);
329                                                         //if the user has not given a path then, add inputdir. else leave path alone.
330                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
331                                                 }
332                                                 
333                                                 int ableToOpen;
334                                                 ifstream in;
335                                                 
336                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
337                                                 
338                                                 //if you can't open it, try default location
339                                                 if (ableToOpen == 1) {
340                                                         if (m->getDefaultPath() != "") { //default path is set
341                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
343                                                                 ifstream in2;
344                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
345                                                                 in2.close();
346                                                                 groupFileNames[i] = tryPath;
347                                                         }
348                                                 }
349                                                 
350                                                 if (ableToOpen == 1) {
351                                                         if (m->getOutputDir() != "") { //default path is set
352                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
354                                                                 ifstream in2;
355                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
356                                                                 in2.close();
357                                                                 groupFileNames[i] = tryPath;
358                                                         }
359                                                 }
360                                                 
361                                                 in.close();
362                                                 
363                                                 if (ableToOpen == 1) { 
364                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
365                                                         //erase from file list
366                                                         groupFileNames.erase(groupFileNames.begin()+i);
367                                                         i--;
368                                                 }else {
369                                                         m->setGroupFile(groupFileNames[i]);
370                                                 }
371                                         }
372                                 }
373                                 
374                                 //make sure there is at least one valid file left
375                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
376                         }
377                         
378                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
379                         
380                         
381                         //if the user changes the output directory command factory will send this info to us in the output parameter 
382                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
383                         
384                         string path;
385                         it = parameters.find("reference");
386                         //user has given a template file
387                         if(it != parameters.end()){ 
388                                 if (it->second == "self") { templatefile = "self"; }
389                                 else {
390                                         path = m->hasPath(it->second);
391                                         //if the user has not given a path then, add inputdir. else leave path alone.
392                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
393                                         
394                                         templatefile = validParameter.validFile(parameters, "reference", true);
395                                         if (templatefile == "not open") { abort = true; }
396                                         else if (templatefile == "not found") { //check for saved reference sequences
397                                                 if (rdb->getSavedReference() != "") {
398                                                         templatefile = rdb->getSavedReference();
399                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
400                                                 }else {
401                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
402                                                         m->mothurOutEndLine();
403                                                         abort = true; 
404                                                 }
405                                         }
406                                 }
407                         }else if (hasName) {  templatefile = "self"; }
408                         else { 
409                                 if (rdb->getSavedReference() != "") {
410                                         templatefile = rdb->getSavedReference();
411                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
412                                 }else {
413                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
414                                         m->mothurOutEndLine();
415                                         templatefile = ""; abort = true; 
416                                 } 
417                         }
418                                 
419                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
420                         m->setProcessors(temp);
421                         m->mothurConvert(temp, processors);
422                         
423                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
424                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
425                         
426                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
427                         chimealns = m->isTrue(temp); 
428                         
429                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
430                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
431                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
432                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
433                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
434                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
435                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
436                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
437                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
438                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
439                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
440                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
441                         
442                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
443                         ucl = m->isTrue(temp);
444                         
445                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
446                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
447                         
448                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
449                         skipgaps = m->isTrue(temp); 
450
451                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
452                         skipgaps2 = m->isTrue(temp); 
453                         
454                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
456                         
457                         //look for uchime exe
458                         path = m->argv;
459                         string tempPath = path;
460                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461                         path = path.substr(0, (tempPath.find_last_of('m')));
462                         
463                         string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
465                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
466             if (m->debug) { 
467                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
468                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
469                 newCommand->execute();
470                 delete newCommand;
471                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
472                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
473                 newCommand->execute();
474                 delete newCommand;
475             }
476 #else
477                         uchimeCommand = path + "uchime.exe";
478 #endif
479         
480                         //test to make sure uchime exists
481                         ifstream in;
482                         uchimeCommand = m->getFullPathName(uchimeCommand);
483                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
484                         if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
485                 }
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
489                 exit(1);
490         }
491 }
492 //***************************************************************************************************************
493
494 int ChimeraUchimeCommand::execute(){
495         try{
496                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
497                 
498                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
499                 
500                 for (int s = 0; s < fastaFileNames.size(); s++) {
501                         
502                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
503                         
504                         int start = time(NULL); 
505                         string nameFile = "";
506                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
507                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
508                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
509                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
510                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
511                                 
512                         //you provided a groupfile
513                         string groupFile = "";
514                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
515                         
516                         if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
517
518                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
519                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
520                                         nameFile = nameFileNames[s];
521                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
522                                                                                 
523                                 map<string, string> seqs;  
524                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
525
526                                 //read namefile
527                                 vector<seqPriorityNode> nameMapCount;
528                                 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
529                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
530                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
531                                 
532                                 printFile(nameMapCount, newFasta);
533                                 fastaFileNames[s] = newFasta;
534                         }
535                         
536                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
537                         
538                         if (groupFile != "") {
539                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
540                                         nameFile = nameFileNames[s];
541                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
542                                 
543                                 //Parse sequences by group
544                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
545                                 vector<string> groups = parser.getNamesOfGroups();
546                                         
547                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
548                                                                 
549                                 //clears files
550                                 ofstream out, out1, out2;
551                                 m->openOutputFile(outputFileName, out); out.close(); 
552                                 m->openOutputFile(accnosFileName, out1); out1.close();
553                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
554                                 int totalSeqs = 0;
555                                 
556                                 if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
557                                 else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
558
559                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
560
561                                 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
562                                 
563                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
564                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
565                                 
566                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
567                                         
568                         }else{
569                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
570                         
571                                 int numSeqs = 0;
572                                 int numChimeras = 0;
573
574                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
575                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
576                                 
577                                 //add headings
578                                 ofstream out;
579                                 m->openOutputFile(outputFileName+".temp", out); 
580                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
581                                 out.close();
582                                 
583                                 m->appendFiles(outputFileName, outputFileName+".temp");
584                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
585                                 
586                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
587                         
588                                 //remove file made for uchime
589                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
590                         
591                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
592                         }
593                         
594                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
595                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
596                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
597                 }
598         
599                 //set accnos file as new current accnosfile
600                 string current = "";
601                 itTypes = outputTypes.find("accnos");
602                 if (itTypes != outputTypes.end()) {
603                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
604                 }
605                 
606                 m->mothurOutEndLine();
607                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
608                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
609                 m->mothurOutEndLine();
610                 
611                 return 0;
612                 
613         }
614         catch(exception& e) {
615                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
616                 exit(1);
617         }
618 }
619 //**********************************************************************************************************************
620 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
621         try {
622                 map<string, string> uniqueNames = parser.getAllSeqsMap();
623                 map<string, string>::iterator itUnique;
624                 int total = 0;
625                 
626                 //edit accnos file
627                 ifstream in2; 
628                 m->openInputFile(accnosFileName, in2);
629                 
630                 ofstream out2;
631                 m->openOutputFile(accnosFileName+".temp", out2);
632                 
633                 string name;
634                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
635                 set<string>::iterator itNames;
636                 set<string> chimerasInFile;
637                 set<string>::iterator itChimeras;
638
639                 
640                 while (!in2.eof()) {
641                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
642                         
643                         in2 >> name; m->gobble(in2);
644                         
645                         //find unique name
646                         itUnique = uniqueNames.find(name);
647                         
648                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
649                         else {
650                                 itChimeras = chimerasInFile.find((itUnique->second));
651                                 
652                                 if (itChimeras == chimerasInFile.end()) {
653                                         out2 << itUnique->second << endl;
654                                         chimerasInFile.insert((itUnique->second));
655                                         total++;
656                                 }
657                         }
658                 }
659                 in2.close();
660                 out2.close();
661                 
662                 m->mothurRemove(accnosFileName);
663                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
664                 
665                 
666                 
667                 //edit chimera file
668                 ifstream in; 
669                 m->openInputFile(outputFileName, in);
670                 
671                 ofstream out;
672                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
673                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
674                 
675                 float temp1;
676                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
677                 name = "";
678                 namesInFile.clear();    
679                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
680                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
681                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
682                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
683                 */
684                 
685                 while (!in.eof()) {
686                         
687                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
688                         
689                         bool print = false;
690                         in >> temp1;    m->gobble(in);
691                         in >> name;             m->gobble(in);
692                         in >> parent1;  m->gobble(in);
693                         in >> parent2;  m->gobble(in);
694                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
695                         m->gobble(in);
696                         
697                         //parse name - name will look like U68590/ab=1/
698                         string restOfName = "";
699                         int pos = name.find_first_of('/');
700                         if (pos != string::npos) {
701                                 restOfName = name.substr(pos);
702                                 name = name.substr(0, pos);
703                         }
704                         
705                         //find unique name
706                         itUnique = uniqueNames.find(name);
707                         
708                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
709                         else {
710                                 name = itUnique->second;
711                                 //is this name already in the file
712                                 itNames = namesInFile.find((name));
713                                 
714                                 if (itNames == namesInFile.end()) { //no not in file
715                                         if (flag == "N") { //are you really a no??
716                                                 //is this sequence really not chimeric??
717                                                 itChimeras = chimerasInFile.find(name);
718                                                 
719                                                 //then you really are a no so print, otherwise skip
720                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
721                                         }else{ print = true; }
722                                 }
723                         }
724                         
725                         if (print) {
726                                 out << temp1 << '\t' << name << restOfName << '\t';
727                                 namesInFile.insert(name);
728                                 
729                                 //parse parent1 names
730                                 if (parent1 != "*") {
731                                         restOfName = "";
732                                         pos = parent1.find_first_of('/');
733                                         if (pos != string::npos) {
734                                                 restOfName = parent1.substr(pos);
735                                                 parent1 = parent1.substr(0, pos);
736                                         }
737                                         
738                                         itUnique = uniqueNames.find(parent1);
739                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
740                                         else {  out << itUnique->second << restOfName << '\t';  }
741                                 }else { out << parent1 << '\t'; }
742                                 
743                                 //parse parent2 names
744                                 if (parent2 != "*") {
745                                         restOfName = "";
746                                         pos = parent2.find_first_of('/');
747                                         if (pos != string::npos) {
748                                                 restOfName = parent2.substr(pos);
749                                                 parent2 = parent2.substr(0, pos);
750                                         }
751                                         
752                                         itUnique = uniqueNames.find(parent2);
753                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
754                                         else {  out << itUnique->second << restOfName << '\t';  }
755                                 }else { out << parent2 << '\t'; }
756                                 
757                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
758                         }
759                 }
760                 in.close();
761                 out.close();
762                 
763                 m->mothurRemove(outputFileName);
764                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
765                 
766                                 
767                 //edit anls file
768                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
769                 /*
770                  ------------------------------------------------------------------------
771                  Query   (  179 nt) F21Fcsw_11639/ab=591/
772                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
773                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
774                  
775                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
776                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
777                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
778                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
779                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
780                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
781                  
782                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
783                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
784                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
785                  Diffs      NNN     N N  N                   N  N BB    NNN                              
786                  Votes      000     0 0  0                   0  0 ++    000                              
787                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
788                  
789                  A   159 TGGAACTGAGACACGGTCCAA 179
790                  Q   159 TGGAACTGAGACACGGTCCAA 179
791                  B   161 TGGAACTGAGACACGGTCCAA 181
792                  Diffs                        
793                  Votes                        
794                  Model   BBBBBBBBBBBBBBBBBBBBB
795                  
796                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
797                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
798                 */
799                 if (chimealns) {
800                         ifstream in3; 
801                         m->openInputFile(alnsFileName, in3);
802                 
803                         ofstream out3;
804                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
805                 
806                         name = "";
807                         namesInFile.clear();
808                         string line = "";
809                         
810                         while (!in3.eof()) {
811                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
812                                 
813                                 line = "";
814                                 line = m->getline(in3); 
815                                 string temp = "";
816                                 
817                                 if (line != "") {
818                                         istringstream iss(line);
819                                         iss >> temp;
820                                         
821                                         //are you a name line
822                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
823                                                 int spot = 0;
824                                                 for (int i = 0; i < line.length(); i++) {
825                                                         spot = i;
826                                                         if (line[i] == ')') { break; }
827                                                         else { out3 << line[i]; }
828                                                 }
829                                                 
830                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
831                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
832                                                 else {
833                                                         out << line[spot] << line[spot+1];
834                                                         
835                                                         name = line.substr(spot+2);
836                                                         
837                                                         //parse name - name will either look like U68590/ab=1/ or U68590
838                                                         string restOfName = "";
839                                                         int pos = name.find_first_of('/');
840                                                         if (pos != string::npos) {
841                                                                 restOfName = name.substr(pos);
842                                                                 name = name.substr(0, pos);
843                                                         }
844                                                         
845                                                         //find unique name
846                                                         itUnique = uniqueNames.find(name);
847                                                         
848                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
849                                                         else {
850                                                                 //only limit repeats on query names
851                                                                 if (temp == "Query") {
852                                                                         itNames = namesInFile.find((itUnique->second));
853                                                                         
854                                                                         if (itNames == namesInFile.end()) {
855                                                                                 out << itUnique->second << restOfName << endl;
856                                                                                 namesInFile.insert((itUnique->second));
857                                                                         }
858                                                                 }else { out << itUnique->second << restOfName << endl;  }
859                                                         }
860                                                         
861                                                 }
862                                                 
863                                         }else { //not need to alter line
864                                                 out3 << line << endl;
865                                         }
866                                 }else { out3 << endl; }
867                         }
868                         in3.close();
869                         out3.close();
870                         
871                         m->mothurRemove(alnsFileName);
872                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
873                 }
874                 
875                 return total;
876         }
877         catch(exception& e) {
878                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
879                 exit(1);
880         }
881 }       
882 //**********************************************************************************************************************
883 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
884         try {
885                 
886                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
887                 
888                 ofstream out;
889                 m->openOutputFile(filename, out);
890                 
891                 //print new file in order of
892                 for (int i = 0; i < nameMapCount.size(); i++) {
893                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
894                 }
895                 out.close();
896                 
897                 return 0;
898         }
899         catch(exception& e) {
900                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
901                 exit(1);
902         }
903 }       
904 //**********************************************************************************************************************
905 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
906         try {
907                 //create input file for uchime
908                 //read through fastafile and store info
909                 ifstream in;
910                 m->openInputFile(filename, in);
911                 
912                 while (!in.eof()) {
913                         
914                         if (m->control_pressed) { in.close(); return 0; }
915                         
916                         Sequence seq(in); m->gobble(in);
917                         seqs[seq.getName()] = seq.getAligned();
918                 }
919                 in.close();
920                 
921                 return 0;
922         }
923         catch(exception& e) {
924                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
925                 exit(1);
926         }
927 }       
928 //**********************************************************************************************************************
929
930 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
931         try {
932                 string nameFile = "";
933                 
934                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
935                 
936                 //use unique.seqs to create new name and fastafile
937                 string inputString = "fasta=" + inputFile;
938                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
939                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
940                 m->mothurCalling = true;
941         
942                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
943                 uniqueCommand->execute();
944                 
945                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
946                 
947                 delete uniqueCommand;
948                 m->mothurCalling = false;
949                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
950                 
951                 nameFile = filenames["name"][0];
952                 inputFile = filenames["fasta"][0];
953                 
954                 return nameFile;
955         }
956         catch(exception& e) {
957                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
958                 exit(1);
959         }
960 }
961 //**********************************************************************************************************************
962 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
963         try {
964                 
965                 int totalSeqs = 0;
966                 int numChimeras = 0;
967                 
968                 for (int i = start; i < end; i++) {
969                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
970                         
971                         int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
972                         
973                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
974                         totalSeqs += numSeqs;
975                         
976                         if (m->control_pressed) { return 0; }
977                         
978                         //remove file made for uchime
979                         m->mothurRemove(filename);
980                         
981                         //append files
982                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
983                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
984                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
985                         
986                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
987                 }       
988                 
989                 return totalSeqs;
990                 
991         }
992         catch(exception& e) {
993                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
994                 exit(1);
995         }
996 }       
997 //**********************************************************************************************************************
998
999 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1000         try {
1001                 
1002                 outputFName = m->getFullPathName(outputFName);
1003                 filename = m->getFullPathName(filename);
1004                 alns = m->getFullPathName(alns);
1005                 
1006                 //to allow for spaces in the path
1007                 outputFName = "\"" + outputFName + "\"";
1008                 filename = "\"" + filename + "\"";
1009                 alns = "\"" + alns + "\"";
1010                                 
1011                 vector<char*> cPara;
1012                 
1013                 string path = m->argv;
1014                 string tempPath = path;
1015                 for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
1016                 path = path.substr(0, (tempPath.find_last_of('m')));
1017                 
1018                 string uchimeCommand = path;
1019 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1020                 uchimeCommand += "uchime ";
1021 #else
1022                 uchimeCommand += "uchime";
1023                 uchimeCommand = "\"" + uchimeCommand + "\"";
1024 #endif
1025                 
1026                 char* tempUchime;
1027                 tempUchime= new char[uchimeCommand.length()+1]; 
1028                 *tempUchime = '\0';
1029                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1030                 cPara.push_back(tempUchime);
1031                 
1032                 char* tempIn = new char[8]; 
1033                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1034                 //strcpy(tempIn, "--input"); 
1035                 cPara.push_back(tempIn);
1036                 char* temp = new char[filename.length()+1];
1037                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1038                 //strcpy(temp, filename.c_str());
1039                 cPara.push_back(temp);
1040                 
1041                 //are you using a reference file
1042                 if (templatefile != "self") {
1043                         //add reference file
1044                         char* tempRef = new char[5]; 
1045                         //strcpy(tempRef, "--db"); 
1046                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1047                         cPara.push_back(tempRef);  
1048                         char* tempR = new char[templatefile.length()+1];
1049                         //strcpy(tempR, templatefile.c_str());
1050                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1051                         cPara.push_back(tempR);
1052                 }
1053                 
1054                 char* tempO = new char[12]; 
1055                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1056                 //strcpy(tempO, "--uchimeout"); 
1057                 cPara.push_back(tempO);
1058                 char* tempout = new char[outputFName.length()+1];
1059                 //strcpy(tempout, outputFName.c_str());
1060                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1061                 cPara.push_back(tempout);
1062                 
1063                 if (chimealns) {
1064                         char* tempA = new char[13]; 
1065                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1066                         //strcpy(tempA, "--uchimealns"); 
1067                         cPara.push_back(tempA);
1068                         char* tempa = new char[alns.length()+1];
1069                         //strcpy(tempa, alns.c_str());
1070                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1071                         cPara.push_back(tempa);
1072                 }
1073                 
1074                 if (useAbskew) {
1075                         char* tempskew = new char[9];
1076                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1077                         //strcpy(tempskew, "--abskew"); 
1078                         cPara.push_back(tempskew);
1079                         char* tempSkew = new char[abskew.length()+1];
1080                         //strcpy(tempSkew, abskew.c_str());
1081                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1082                         cPara.push_back(tempSkew);
1083                 }
1084                 
1085                 if (useMinH) {
1086                         char* tempminh = new char[7]; 
1087                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1088                         //strcpy(tempminh, "--minh"); 
1089                         cPara.push_back(tempminh);
1090                         char* tempMinH = new char[minh.length()+1];
1091                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1092                         //strcpy(tempMinH, minh.c_str());
1093                         cPara.push_back(tempMinH);
1094                 }
1095                 
1096                 if (useMindiv) {
1097                         char* tempmindiv = new char[9]; 
1098                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1099                         //strcpy(tempmindiv, "--mindiv"); 
1100                         cPara.push_back(tempmindiv);
1101                         char* tempMindiv = new char[mindiv.length()+1];
1102                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1103                         //strcpy(tempMindiv, mindiv.c_str());
1104                         cPara.push_back(tempMindiv);
1105                 }
1106                 
1107                 if (useXn) {
1108                         char* tempxn = new char[5]; 
1109                         //strcpy(tempxn, "--xn"); 
1110                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1111                         cPara.push_back(tempxn);
1112                         char* tempXn = new char[xn.length()+1];
1113                         //strcpy(tempXn, xn.c_str());
1114                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1115                         cPara.push_back(tempXn);
1116                 }
1117                 
1118                 if (useDn) {
1119                         char* tempdn = new char[5]; 
1120                         //strcpy(tempdn, "--dn"); 
1121                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1122                         cPara.push_back(tempdn);
1123                         char* tempDn = new char[dn.length()+1];
1124                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1125                         //strcpy(tempDn, dn.c_str());
1126                         cPara.push_back(tempDn);
1127                 }
1128                 
1129                 if (useXa) {
1130                         char* tempxa = new char[5]; 
1131                         //strcpy(tempxa, "--xa"); 
1132                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1133                         cPara.push_back(tempxa);
1134                         char* tempXa = new char[xa.length()+1];
1135                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1136                         //strcpy(tempXa, xa.c_str());
1137                         cPara.push_back(tempXa);
1138                 }
1139                 
1140                 if (useChunks) {
1141                         char* tempchunks = new char[9]; 
1142                         //strcpy(tempchunks, "--chunks"); 
1143                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1144                         cPara.push_back(tempchunks);
1145                         char* tempChunks = new char[chunks.length()+1];
1146                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1147                         //strcpy(tempChunks, chunks.c_str());
1148                         cPara.push_back(tempChunks);
1149                 }
1150                 
1151                 if (useMinchunk) {
1152                         char* tempminchunk = new char[11]; 
1153                         //strcpy(tempminchunk, "--minchunk"); 
1154                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1155                         cPara.push_back(tempminchunk);
1156                         char* tempMinchunk = new char[minchunk.length()+1];
1157                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1158                         //strcpy(tempMinchunk, minchunk.c_str());
1159                         cPara.push_back(tempMinchunk);
1160                 }
1161                 
1162                 if (useIdsmoothwindow) {
1163                         char* tempidsmoothwindow = new char[17]; 
1164                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1165                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1166                         cPara.push_back(tempidsmoothwindow);
1167                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1168                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1169                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1170                         cPara.push_back(tempIdsmoothwindow);
1171                 }
1172                 
1173                 /*if (useMinsmoothid) {
1174                         char* tempminsmoothid = new char[14]; 
1175                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1176                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1177                         cPara.push_back(tempminsmoothid);
1178                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1179                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1180                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1181                         cPara.push_back(tempMinsmoothid);
1182                 }*/
1183                 
1184                 if (useMaxp) {
1185                         char* tempmaxp = new char[7]; 
1186                         //strcpy(tempmaxp, "--maxp"); 
1187                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1188                         cPara.push_back(tempmaxp);
1189                         char* tempMaxp = new char[maxp.length()+1];
1190                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1191                         //strcpy(tempMaxp, maxp.c_str());
1192                         cPara.push_back(tempMaxp);
1193                 }
1194                 
1195                 if (!skipgaps) {
1196                         char* tempskipgaps = new char[13]; 
1197                         //strcpy(tempskipgaps, "--[no]skipgaps");
1198                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1199                         cPara.push_back(tempskipgaps);
1200                 }
1201                 
1202                 if (!skipgaps2) {
1203                         char* tempskipgaps2 = new char[14]; 
1204                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1205                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1206                         cPara.push_back(tempskipgaps2);
1207                 }
1208                 
1209                 if (useMinlen) {
1210                         char* tempminlen = new char[9]; 
1211                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1212                         //strcpy(tempminlen, "--minlen"); 
1213                         cPara.push_back(tempminlen);
1214                         char* tempMinlen = new char[minlen.length()+1];
1215                         //strcpy(tempMinlen, minlen.c_str());
1216                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1217                         cPara.push_back(tempMinlen);
1218                 }
1219                 
1220                 if (useMaxlen) {
1221                         char* tempmaxlen = new char[9]; 
1222                         //strcpy(tempmaxlen, "--maxlen"); 
1223                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1224                         cPara.push_back(tempmaxlen);
1225                         char* tempMaxlen = new char[maxlen.length()+1];
1226                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1227                         //strcpy(tempMaxlen, maxlen.c_str());
1228                         cPara.push_back(tempMaxlen);
1229                 }
1230                 
1231                 if (ucl) {
1232                         char* tempucl = new char[5]; 
1233                         strcpy(tempucl, "--ucl"); 
1234                         cPara.push_back(tempucl);
1235                 }
1236                 
1237                 if (useQueryfract) {
1238                         char* tempqueryfract = new char[13]; 
1239                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1240                         //strcpy(tempqueryfract, "--queryfract"); 
1241                         cPara.push_back(tempqueryfract);
1242                         char* tempQueryfract = new char[queryfract.length()+1];
1243                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1244                         //strcpy(tempQueryfract, queryfract.c_str());
1245                         cPara.push_back(tempQueryfract);
1246                 }
1247                 
1248                 
1249                 char** uchimeParameters;
1250                 uchimeParameters = new char*[cPara.size()];
1251                 string commandString = "";
1252                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1253                 //int numArgs = cPara.size();
1254                 
1255                 //uchime_main(numArgs, uchimeParameters); 
1256                 //cout << "commandString = " << commandString << endl;
1257 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1258 #else
1259                 commandString = "\"" + commandString + "\"";
1260 #endif
1261                 system(commandString.c_str());
1262                 
1263                 //free memory
1264                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1265                 delete[] uchimeParameters; 
1266                 
1267                 //remove "" from filenames
1268                 outputFName = outputFName.substr(1, outputFName.length()-2);
1269                 filename = filename.substr(1, filename.length()-2);
1270                 alns = alns.substr(1, alns.length()-2);
1271                 
1272                 if (m->control_pressed) { return 0; }
1273                 
1274                 //create accnos file from uchime results
1275                 ifstream in; 
1276                 m->openInputFile(outputFName, in);
1277                 
1278                 ofstream out;
1279                 m->openOutputFile(accnos, out);
1280                 
1281                 int num = 0;
1282                 numChimeras = 0;
1283                 while(!in.eof()) {
1284                         
1285                         if (m->control_pressed) { break; }
1286                         
1287                         string name = "";
1288                         string chimeraFlag = "";
1289                         in >> chimeraFlag >> name;
1290                         
1291                         //fix name if needed
1292                         if (templatefile == "self") { 
1293                                 name = name.substr(0, name.length()-1); //rip off last /
1294                                 name = name.substr(0, name.find_last_of('/'));
1295                         }
1296                         
1297                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1298                         m->gobble(in);
1299                         
1300                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1301                         num++;
1302                 }
1303                 in.close();
1304                 out.close();
1305                 
1306                 return num;
1307         }
1308         catch(exception& e) {
1309                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1310                 exit(1);
1311         }
1312 }
1313 /**************************************************************************************************/
1314
1315 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1316         try {
1317                 
1318                 processIDS.clear();
1319                 int process = 1;
1320                 int num = 0;
1321                 vector<string> files;
1322                 
1323 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1324                 //break up file into multiple files
1325                 m->divideFile(filename, processors, files);
1326                 
1327                 if (m->control_pressed) {  return 0;  }
1328                                 
1329                 //loop through and create all the processes you want
1330                 while (process != processors) {
1331                         int pid = fork();
1332                         
1333                         if (pid > 0) {
1334                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1335                                 process++;
1336                         }else if (pid == 0){
1337                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1338                                 
1339                                 //pass numSeqs to parent
1340                                 ofstream out;
1341                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1342                                 m->openOutputFile(tempFile, out);
1343                                 out << num << endl;
1344                                 out << numChimeras << endl;
1345                                 out.close();
1346                                 
1347                                 exit(0);
1348                         }else { 
1349                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1350                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1351                                 exit(0);
1352                         }
1353                 }
1354                 
1355                 //do my part
1356                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1357                 
1358                 //force parent to wait until all the processes are done
1359                 for (int i=0;i<processIDS.size();i++) { 
1360                         int temp = processIDS[i];
1361                         wait(&temp);
1362                 }
1363                 
1364                 for (int i = 0; i < processIDS.size(); i++) {
1365                         ifstream in;
1366                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1367                         m->openInputFile(tempFile, in);
1368                         if (!in.eof()) { 
1369                                 int tempNum = 0; 
1370                                 in >> tempNum; m->gobble(in);
1371                                 num += tempNum; 
1372                                 in >> tempNum;
1373                                 numChimeras += tempNum;
1374                         }
1375                         in.close(); m->mothurRemove(tempFile);
1376                 }
1377 #else
1378                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1379                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1380                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1381                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1382                 
1383                 //divide file
1384                 int count = 0;
1385                 int spot = 0;
1386                 map<int, ofstream*> filehandles;
1387                 map<int, ofstream*>::iterator it3;
1388                 
1389                 ofstream* temp;
1390                 for (int i = 0; i < processors; i++) {
1391                         temp = new ofstream;
1392                         filehandles[i] = temp;
1393                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1394                         files.push_back(filename+toString(i)+".temp");
1395                 }
1396                 
1397                 ifstream in;
1398                 m->openInputFile(filename, in);
1399                 
1400                 while(!in.eof()) {
1401                         
1402                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1403                         
1404                         Sequence tempSeq(in); m->gobble(in); 
1405                         
1406                         if (tempSeq.getName() != "") {
1407                                 tempSeq.printSequence(*(filehandles[spot])); 
1408                                 spot++; count++;
1409                                 if (spot == processors) { spot = 0; }
1410                         }
1411                 }
1412                 in.close();
1413                 
1414                 //delete memory
1415                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1416                         (*(it3->second)).close();
1417                         delete it3->second;
1418                 }
1419                 
1420                 //sanity check for number of processors
1421                 if (count < processors) { processors = count; }
1422                 
1423                 vector<uchimeData*> pDataArray; 
1424                 DWORD   dwThreadIdArray[processors-1];
1425                 HANDLE  hThreadArray[processors-1]; 
1426                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1427                 
1428                 //Create processor worker threads.
1429                 for( int i=1; i<processors; i++ ){
1430                         // Allocate memory for thread data.
1431                         string extension = toString(i) + ".temp";
1432                         
1433                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1434                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1435                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1436                         
1437                         pDataArray.push_back(tempUchime);
1438                         processIDS.push_back(i);
1439                         
1440                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1441                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1442                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1443                 }
1444                 
1445                 
1446                 //using the main process as a worker saves time and memory
1447                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1448                 
1449                 //Wait until all threads have terminated.
1450                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1451                 
1452                 //Close all thread handles and free memory allocations.
1453                 for(int i=0; i < pDataArray.size(); i++){
1454                         num += pDataArray[i]->count;
1455                         numChimeras += pDataArray[i]->numChimeras;
1456                         CloseHandle(hThreadArray[i]);
1457                         delete pDataArray[i];
1458                 }
1459 #endif          
1460                 
1461                 //append output files
1462                 for(int i=0;i<processIDS.size();i++){
1463                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1464                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1465                         
1466                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1467                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1468                         
1469                         if (chimealns) {
1470                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1471                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1472                         }
1473                 }
1474                 
1475                 //get rid of the file pieces.
1476                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1477                 return num;     
1478         }
1479         catch(exception& e) {
1480                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1481                 exit(1);
1482         }
1483 }
1484 /**************************************************************************************************/
1485
1486 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1487         try {
1488                 
1489                 processIDS.clear();
1490                 int process = 1;
1491                 int num = 0;
1492                 
1493                 //sanity check
1494                 if (groups.size() < processors) { processors = groups.size(); }
1495                 
1496                 //divide the groups between the processors
1497                 vector<linePair> lines;
1498                 int numGroupsPerProcessor = groups.size() / processors;
1499                 for (int i = 0; i < processors; i++) {
1500                         int startIndex =  i * numGroupsPerProcessor;
1501                         int endIndex = (i+1) * numGroupsPerProcessor;
1502                         if(i == (processors - 1)){      endIndex = groups.size();       }
1503                         lines.push_back(linePair(startIndex, endIndex));
1504                 }
1505                 
1506 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1507                                 
1508                 //loop through and create all the processes you want
1509                 while (process != processors) {
1510                         int pid = fork();
1511                         
1512                         if (pid > 0) {
1513                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1514                                 process++;
1515                         }else if (pid == 0){
1516                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1517                                 
1518                                 //pass numSeqs to parent
1519                                 ofstream out;
1520                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1521                                 m->openOutputFile(tempFile, out);
1522                                 out << num << endl;
1523                                 out.close();
1524                                 
1525                                 exit(0);
1526                         }else { 
1527                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1528                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1529                                 exit(0);
1530                         }
1531                 }
1532                 
1533                 //do my part
1534                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1535                 
1536                 //force parent to wait until all the processes are done
1537                 for (int i=0;i<processIDS.size();i++) { 
1538                         int temp = processIDS[i];
1539                         wait(&temp);
1540                 }
1541                 
1542                 for (int i = 0; i < processIDS.size(); i++) {
1543                         ifstream in;
1544                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1545                         m->openInputFile(tempFile, in);
1546                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1547                         in.close(); m->mothurRemove(tempFile);
1548                 }
1549                                 
1550 #else
1551                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1552                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1553                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1554                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1555                 
1556                 vector<uchimeData*> pDataArray; 
1557                 DWORD   dwThreadIdArray[processors-1];
1558                 HANDLE  hThreadArray[processors-1]; 
1559                 
1560                 //Create processor worker threads.
1561                 for( int i=1; i<processors; i++ ){
1562                         // Allocate memory for thread data.
1563                         string extension = toString(i) + ".temp";
1564                         
1565                         uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1566                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1567                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1568                         
1569                         pDataArray.push_back(tempUchime);
1570                         processIDS.push_back(i);
1571                         
1572                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1573                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1574                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1575                 }
1576                 
1577                 
1578                 //using the main process as a worker saves time and memory
1579                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1580                 
1581                 //Wait until all threads have terminated.
1582                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1583                 
1584                 //Close all thread handles and free memory allocations.
1585                 for(int i=0; i < pDataArray.size(); i++){
1586                         num += pDataArray[i]->count;
1587                         CloseHandle(hThreadArray[i]);
1588                         delete pDataArray[i];
1589                 }
1590 #endif          
1591                 
1592                                 
1593                 //append output files
1594                 for(int i=0;i<processIDS.size();i++){
1595                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1596                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1597                         
1598                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1599                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1600                         
1601                         if (chimealns) {
1602                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1603                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1604                         }
1605                 }
1606                 
1607                 return num;     
1608                 
1609         }
1610         catch(exception& e) {
1611                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1612                 exit(1);
1613         }
1614 }
1615 /**************************************************************************************************/
1616