]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
fixed bug with dist.shared subsampling. added mode parameter to dist.shared so...
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){   
57         try {
58                 string helpString = "";
59                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
68                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
86 #ifdef USE_MPI
87                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
88 #endif
89                 helpString += "The chimera.uchime command should be in the following format: \n";
90                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
102         try {
103                 abort = true; calledHelp = true;
104                 setParameters();
105                 vector<string> tempOutNames;
106                 outputTypes["chimera"] = tempOutNames;
107                 outputTypes["accnos"] = tempOutNames;
108                 outputTypes["alns"] = tempOutNames;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112                 exit(1);
113         }
114 }
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
117         try {
118                 abort = false; calledHelp = false; 
119                 ReferenceDB* rdb = ReferenceDB::getInstance();
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter("chimera.uchime");
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         vector<string> tempOutNames;
140                         outputTypes["chimera"] = tempOutNames;
141                         outputTypes["accnos"] = tempOutNames;
142                         outputTypes["alns"] = tempOutNames;
143                         
144                         //if the user changes the input directory command factory will send this info to us in the output parameter 
145                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
146                         if (inputDir == "not found"){   inputDir = "";          }
147                         
148                         //check for required parameters
149                         fastafile = validParameter.validFile(parameters, "fasta", false);
150                         if (fastafile == "not found") {                                 
151                                 //if there is a current fasta file, use it
152                                 string filename = m->getFastaFile(); 
153                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
155                         }else { 
156                                 m->splitAtDash(fastafile, fastaFileNames);
157                                 
158                                 //go through files and make sure they are good, if not, then disregard them
159                                 for (int i = 0; i < fastaFileNames.size(); i++) {
160                                         
161                                         bool ignore = false;
162                                         if (fastaFileNames[i] == "current") { 
163                                                 fastaFileNames[i] = m->getFastaFile(); 
164                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
165                                                 else {  
166                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
167                                                         //erase from file list
168                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
169                                                         i--;
170                                                 }
171                                         }
172                                         
173                                         if (!ignore) {
174                                                 
175                                                 if (inputDir != "") {
176                                                         string path = m->hasPath(fastaFileNames[i]);
177                                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
179                                                 }
180                                                 
181                                                 int ableToOpen;
182                                                 ifstream in;
183                                                 
184                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
185                                                 
186                                                 //if you can't open it, try default location
187                                                 if (ableToOpen == 1) {
188                                                         if (m->getDefaultPath() != "") { //default path is set
189                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
191                                                                 ifstream in2;
192                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
193                                                                 in2.close();
194                                                                 fastaFileNames[i] = tryPath;
195                                                         }
196                                                 }
197                                                 
198                                                 if (ableToOpen == 1) {
199                                                         if (m->getOutputDir() != "") { //default path is set
200                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
202                                                                 ifstream in2;
203                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
204                                                                 in2.close();
205                                                                 fastaFileNames[i] = tryPath;
206                                                         }
207                                                 }
208                                                 
209                                                 in.close();
210                                                 
211                                                 if (ableToOpen == 1) { 
212                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
213                                                         //erase from file list
214                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
215                                                         i--;
216                                                 }else {
217                                                         m->setFastaFile(fastaFileNames[i]);
218                                                 }
219                                         }
220                                 }
221                                 
222                                 //make sure there is at least one valid file left
223                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
224                         }
225                         
226                         
227                         //check for required parameters
228                         bool hasName = true;
229                         namefile = validParameter.validFile(parameters, "name", false);
230                         if (namefile == "not found") { namefile = "";  hasName = false; }
231                         else { 
232                                 m->splitAtDash(namefile, nameFileNames);
233                                 
234                                 //go through files and make sure they are good, if not, then disregard them
235                                 for (int i = 0; i < nameFileNames.size(); i++) {
236                                         
237                                         bool ignore = false;
238                                         if (nameFileNames[i] == "current") { 
239                                                 nameFileNames[i] = m->getNameFile(); 
240                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
241                                                 else {  
242                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
243                                                         //erase from file list
244                                                         nameFileNames.erase(nameFileNames.begin()+i);
245                                                         i--;
246                                                 }
247                                         }
248                                         
249                                         if (!ignore) {
250                                                 
251                                                 if (inputDir != "") {
252                                                         string path = m->hasPath(nameFileNames[i]);
253                                                         //if the user has not given a path then, add inputdir. else leave path alone.
254                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
255                                                 }
256                                                 
257                                                 int ableToOpen;
258                                                 ifstream in;
259                                                 
260                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
261                                                 
262                                                 //if you can't open it, try default location
263                                                 if (ableToOpen == 1) {
264                                                         if (m->getDefaultPath() != "") { //default path is set
265                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
267                                                                 ifstream in2;
268                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
269                                                                 in2.close();
270                                                                 nameFileNames[i] = tryPath;
271                                                         }
272                                                 }
273                                                 
274                                                 if (ableToOpen == 1) {
275                                                         if (m->getOutputDir() != "") { //default path is set
276                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
278                                                                 ifstream in2;
279                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280                                                                 in2.close();
281                                                                 nameFileNames[i] = tryPath;
282                                                         }
283                                                 }
284                                                 
285                                                 in.close();
286                                                 
287                                                 if (ableToOpen == 1) { 
288                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
289                                                         //erase from file list
290                                                         nameFileNames.erase(nameFileNames.begin()+i);
291                                                         i--;
292                                                 }else {
293                                                         m->setNameFile(nameFileNames[i]);
294                                                 }
295                                         }
296                                 }
297                                 
298                                 //make sure there is at least one valid file left
299                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
300                         }
301                         
302                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
303                         
304                         bool hasGroup = true;
305                         groupfile = validParameter.validFile(parameters, "group", false);
306                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
307                         else { 
308                                 m->splitAtDash(groupfile, groupFileNames);
309                                 
310                                 //go through files and make sure they are good, if not, then disregard them
311                                 for (int i = 0; i < groupFileNames.size(); i++) {
312                                         
313                                         bool ignore = false;
314                                         if (groupFileNames[i] == "current") { 
315                                                 groupFileNames[i] = m->getGroupFile(); 
316                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
317                                                 else {  
318                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
319                                                         //erase from file list
320                                                         groupFileNames.erase(groupFileNames.begin()+i);
321                                                         i--;
322                                                 }
323                                         }
324                                         
325                                         if (!ignore) {
326                                                 
327                                                 if (inputDir != "") {
328                                                         string path = m->hasPath(groupFileNames[i]);
329                                                         //if the user has not given a path then, add inputdir. else leave path alone.
330                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
331                                                 }
332                                                 
333                                                 int ableToOpen;
334                                                 ifstream in;
335                                                 
336                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
337                                                 
338                                                 //if you can't open it, try default location
339                                                 if (ableToOpen == 1) {
340                                                         if (m->getDefaultPath() != "") { //default path is set
341                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
343                                                                 ifstream in2;
344                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
345                                                                 in2.close();
346                                                                 groupFileNames[i] = tryPath;
347                                                         }
348                                                 }
349                                                 
350                                                 if (ableToOpen == 1) {
351                                                         if (m->getOutputDir() != "") { //default path is set
352                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
354                                                                 ifstream in2;
355                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
356                                                                 in2.close();
357                                                                 groupFileNames[i] = tryPath;
358                                                         }
359                                                 }
360                                                 
361                                                 in.close();
362                                                 
363                                                 if (ableToOpen == 1) { 
364                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
365                                                         //erase from file list
366                                                         groupFileNames.erase(groupFileNames.begin()+i);
367                                                         i--;
368                                                 }else {
369                                                         m->setGroupFile(groupFileNames[i]);
370                                                 }
371                                         }
372                                 }
373                                 
374                                 //make sure there is at least one valid file left
375                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
376                         }
377                         
378                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
379                         
380                         
381                         //if the user changes the output directory command factory will send this info to us in the output parameter 
382                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
383                         
384                         string path;
385                         it = parameters.find("reference");
386                         //user has given a template file
387                         if(it != parameters.end()){ 
388                                 if (it->second == "self") { templatefile = "self"; }
389                                 else {
390                                         path = m->hasPath(it->second);
391                                         //if the user has not given a path then, add inputdir. else leave path alone.
392                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
393                                         
394                                         templatefile = validParameter.validFile(parameters, "reference", true);
395                                         if (templatefile == "not open") { abort = true; }
396                                         else if (templatefile == "not found") { //check for saved reference sequences
397                                                 if (rdb->getSavedReference() != "") {
398                                                         templatefile = rdb->getSavedReference();
399                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
400                                                 }else {
401                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
402                                                         m->mothurOutEndLine();
403                                                         abort = true; 
404                                                 }
405                                         }
406                                 }
407                         }else if (hasName) {  templatefile = "self"; }
408                         else { 
409                                 if (rdb->getSavedReference() != "") {
410                                         templatefile = rdb->getSavedReference();
411                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
412                                 }else {
413                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
414                                         m->mothurOutEndLine();
415                                         templatefile = ""; abort = true; 
416                                 } 
417                         }
418                                 
419                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
420                         m->setProcessors(temp);
421                         m->mothurConvert(temp, processors);
422                         
423                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
424                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
425                         
426                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
427                         chimealns = m->isTrue(temp); 
428                         
429                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
430                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
431                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
432                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
433                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
434                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
435                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
436                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
437                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
438                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
439                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
440                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
441                         
442                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
443                         ucl = m->isTrue(temp);
444                         
445                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
446                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
447                         
448                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
449                         skipgaps = m->isTrue(temp); 
450
451                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
452                         skipgaps2 = m->isTrue(temp); 
453                         
454                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
456                         
457                         //look for uchime exe
458                         path = m->argv;
459                         string tempPath = path;
460                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461                         path = path.substr(0, (tempPath.find_last_of('m')));
462                         
463                         string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
465                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
466             if (m->debug) { 
467                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
468                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
469                 newCommand->execute();
470                 delete newCommand;
471                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
472                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
473                 newCommand->execute();
474                 delete newCommand;
475             }
476 #else
477                         uchimeCommand = path + "uchime.exe";
478 #endif
479         
480                         //test to make sure uchime exists
481                         ifstream in;
482                         uchimeCommand = m->getFullPathName(uchimeCommand);
483                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
484                         if(ableToOpen == 1) {   
485                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
486                 //check to see if uchime is in the path??
487                 
488                 string uLocation = m->findProgramPath("uchime");
489                 
490                 
491                 ifstream in2;
492 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
493                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
494 #else
495                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
496 #endif
497
498                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
499                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
500             }else {  uchimeLocation = uchimeCommand; }
501             
502             uchimeLocation = m->getFullPathName(uchimeLocation);
503         }
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
507                 exit(1);
508         }
509 }
510 //***************************************************************************************************************
511
512 int ChimeraUchimeCommand::execute(){
513         try{
514                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
515                 
516                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
517                 
518                 for (int s = 0; s < fastaFileNames.size(); s++) {
519                         
520                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
521                         
522                         int start = time(NULL); 
523                         string nameFile = "";
524                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
525                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
526                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
527                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
528                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
529                                 
530                         //you provided a groupfile
531                         string groupFile = "";
532                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
533                         
534                         if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
535
536                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
537                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
538                                         nameFile = nameFileNames[s];
539                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
540                                                                                 
541                                 map<string, string> seqs;  
542                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
543
544                                 //read namefile
545                                 vector<seqPriorityNode> nameMapCount;
546                                 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
547                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
548                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
549                                 
550                                 printFile(nameMapCount, newFasta);
551                                 fastaFileNames[s] = newFasta;
552                         }
553                         
554                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
555                         
556                         if (groupFile != "") {
557                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
558                                         nameFile = nameFileNames[s];
559                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
560                                 
561                                 //Parse sequences by group
562                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
563                                 vector<string> groups = parser.getNamesOfGroups();
564                                         
565                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
566                                                                 
567                                 //clears files
568                                 ofstream out, out1, out2;
569                                 m->openOutputFile(outputFileName, out); out.close(); 
570                                 m->openOutputFile(accnosFileName, out1); out1.close();
571                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
572                                 int totalSeqs = 0;
573                                 
574                                 if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
575                                 else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
576
577                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
578
579                                 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
580                                 
581                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
582                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
583                                 
584                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
585                                         
586                         }else{
587                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
588                         
589                                 int numSeqs = 0;
590                                 int numChimeras = 0;
591
592                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
593                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
594                                 
595                                 //add headings
596                                 ofstream out;
597                                 m->openOutputFile(outputFileName+".temp", out); 
598                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
599                                 out.close();
600                                 
601                                 m->appendFiles(outputFileName, outputFileName+".temp");
602                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
603                                 
604                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
605                         
606                                 //remove file made for uchime
607                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
608                         
609                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
610                         }
611                         
612                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
613                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
614                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
615                 }
616         
617                 //set accnos file as new current accnosfile
618                 string current = "";
619                 itTypes = outputTypes.find("accnos");
620                 if (itTypes != outputTypes.end()) {
621                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
622                 }
623                 
624                 m->mothurOutEndLine();
625                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
626                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
627                 m->mothurOutEndLine();
628                 
629                 return 0;
630                 
631         }
632         catch(exception& e) {
633                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
634                 exit(1);
635         }
636 }
637 //**********************************************************************************************************************
638 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
639         try {
640                 map<string, string> uniqueNames = parser.getAllSeqsMap();
641                 map<string, string>::iterator itUnique;
642                 int total = 0;
643                 
644                 //edit accnos file
645                 ifstream in2; 
646                 m->openInputFile(accnosFileName, in2);
647                 
648                 ofstream out2;
649                 m->openOutputFile(accnosFileName+".temp", out2);
650                 
651                 string name;
652                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
653                 set<string>::iterator itNames;
654                 set<string> chimerasInFile;
655                 set<string>::iterator itChimeras;
656
657                 
658                 while (!in2.eof()) {
659                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
660                         
661                         in2 >> name; m->gobble(in2);
662                         
663                         //find unique name
664                         itUnique = uniqueNames.find(name);
665                         
666                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
667                         else {
668                                 itChimeras = chimerasInFile.find((itUnique->second));
669                                 
670                                 if (itChimeras == chimerasInFile.end()) {
671                                         out2 << itUnique->second << endl;
672                                         chimerasInFile.insert((itUnique->second));
673                                         total++;
674                                 }
675                         }
676                 }
677                 in2.close();
678                 out2.close();
679                 
680                 m->mothurRemove(accnosFileName);
681                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
682                 
683                 
684                 
685                 //edit chimera file
686                 ifstream in; 
687                 m->openInputFile(outputFileName, in);
688                 
689                 ofstream out;
690                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
691                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
692                 
693                 float temp1;
694                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
695                 name = "";
696                 namesInFile.clear();    
697                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
698                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
699                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
700                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
701                 */
702                 
703                 while (!in.eof()) {
704                         
705                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
706                         
707                         bool print = false;
708                         in >> temp1;    m->gobble(in);
709                         in >> name;             m->gobble(in);
710                         in >> parent1;  m->gobble(in);
711                         in >> parent2;  m->gobble(in);
712                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
713                         m->gobble(in);
714                         
715                         //parse name - name will look like U68590/ab=1/
716                         string restOfName = "";
717                         int pos = name.find_first_of('/');
718                         if (pos != string::npos) {
719                                 restOfName = name.substr(pos);
720                                 name = name.substr(0, pos);
721                         }
722                         
723                         //find unique name
724                         itUnique = uniqueNames.find(name);
725                         
726                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
727                         else {
728                                 name = itUnique->second;
729                                 //is this name already in the file
730                                 itNames = namesInFile.find((name));
731                                 
732                                 if (itNames == namesInFile.end()) { //no not in file
733                                         if (flag == "N") { //are you really a no??
734                                                 //is this sequence really not chimeric??
735                                                 itChimeras = chimerasInFile.find(name);
736                                                 
737                                                 //then you really are a no so print, otherwise skip
738                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
739                                         }else{ print = true; }
740                                 }
741                         }
742                         
743                         if (print) {
744                                 out << temp1 << '\t' << name << restOfName << '\t';
745                                 namesInFile.insert(name);
746                                 
747                                 //parse parent1 names
748                                 if (parent1 != "*") {
749                                         restOfName = "";
750                                         pos = parent1.find_first_of('/');
751                                         if (pos != string::npos) {
752                                                 restOfName = parent1.substr(pos);
753                                                 parent1 = parent1.substr(0, pos);
754                                         }
755                                         
756                                         itUnique = uniqueNames.find(parent1);
757                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
758                                         else {  out << itUnique->second << restOfName << '\t';  }
759                                 }else { out << parent1 << '\t'; }
760                                 
761                                 //parse parent2 names
762                                 if (parent2 != "*") {
763                                         restOfName = "";
764                                         pos = parent2.find_first_of('/');
765                                         if (pos != string::npos) {
766                                                 restOfName = parent2.substr(pos);
767                                                 parent2 = parent2.substr(0, pos);
768                                         }
769                                         
770                                         itUnique = uniqueNames.find(parent2);
771                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
772                                         else {  out << itUnique->second << restOfName << '\t';  }
773                                 }else { out << parent2 << '\t'; }
774                                 
775                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
776                         }
777                 }
778                 in.close();
779                 out.close();
780                 
781                 m->mothurRemove(outputFileName);
782                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
783                 
784                                 
785                 //edit anls file
786                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
787                 /*
788                  ------------------------------------------------------------------------
789                  Query   (  179 nt) F21Fcsw_11639/ab=591/
790                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
791                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
792                  
793                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
794                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
795                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
796                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
797                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
798                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
799                  
800                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
801                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
802                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
803                  Diffs      NNN     N N  N                   N  N BB    NNN                              
804                  Votes      000     0 0  0                   0  0 ++    000                              
805                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
806                  
807                  A   159 TGGAACTGAGACACGGTCCAA 179
808                  Q   159 TGGAACTGAGACACGGTCCAA 179
809                  B   161 TGGAACTGAGACACGGTCCAA 181
810                  Diffs                        
811                  Votes                        
812                  Model   BBBBBBBBBBBBBBBBBBBBB
813                  
814                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
815                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
816                 */
817                 if (chimealns) {
818                         ifstream in3; 
819                         m->openInputFile(alnsFileName, in3);
820                 
821                         ofstream out3;
822                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
823                 
824                         name = "";
825                         namesInFile.clear();
826                         string line = "";
827                         
828                         while (!in3.eof()) {
829                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
830                                 
831                                 line = "";
832                                 line = m->getline(in3); 
833                                 string temp = "";
834                                 
835                                 if (line != "") {
836                                         istringstream iss(line);
837                                         iss >> temp;
838                                         
839                                         //are you a name line
840                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
841                                                 int spot = 0;
842                                                 for (int i = 0; i < line.length(); i++) {
843                                                         spot = i;
844                                                         if (line[i] == ')') { break; }
845                                                         else { out3 << line[i]; }
846                                                 }
847                                                 
848                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
849                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
850                                                 else {
851                                                         out << line[spot] << line[spot+1];
852                                                         
853                                                         name = line.substr(spot+2);
854                                                         
855                                                         //parse name - name will either look like U68590/ab=1/ or U68590
856                                                         string restOfName = "";
857                                                         int pos = name.find_first_of('/');
858                                                         if (pos != string::npos) {
859                                                                 restOfName = name.substr(pos);
860                                                                 name = name.substr(0, pos);
861                                                         }
862                                                         
863                                                         //find unique name
864                                                         itUnique = uniqueNames.find(name);
865                                                         
866                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
867                                                         else {
868                                                                 //only limit repeats on query names
869                                                                 if (temp == "Query") {
870                                                                         itNames = namesInFile.find((itUnique->second));
871                                                                         
872                                                                         if (itNames == namesInFile.end()) {
873                                                                                 out << itUnique->second << restOfName << endl;
874                                                                                 namesInFile.insert((itUnique->second));
875                                                                         }
876                                                                 }else { out << itUnique->second << restOfName << endl;  }
877                                                         }
878                                                         
879                                                 }
880                                                 
881                                         }else { //not need to alter line
882                                                 out3 << line << endl;
883                                         }
884                                 }else { out3 << endl; }
885                         }
886                         in3.close();
887                         out3.close();
888                         
889                         m->mothurRemove(alnsFileName);
890                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
891                 }
892                 
893                 return total;
894         }
895         catch(exception& e) {
896                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
897                 exit(1);
898         }
899 }       
900 //**********************************************************************************************************************
901 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
902         try {
903                 
904                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
905                 
906                 ofstream out;
907                 m->openOutputFile(filename, out);
908                 
909                 //print new file in order of
910                 for (int i = 0; i < nameMapCount.size(); i++) {
911                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
912                 }
913                 out.close();
914                 
915                 return 0;
916         }
917         catch(exception& e) {
918                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
919                 exit(1);
920         }
921 }       
922 //**********************************************************************************************************************
923 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
924         try {
925                 //create input file for uchime
926                 //read through fastafile and store info
927                 ifstream in;
928                 m->openInputFile(filename, in);
929                 
930                 while (!in.eof()) {
931                         
932                         if (m->control_pressed) { in.close(); return 0; }
933                         
934                         Sequence seq(in); m->gobble(in);
935                         seqs[seq.getName()] = seq.getAligned();
936                 }
937                 in.close();
938                 
939                 return 0;
940         }
941         catch(exception& e) {
942                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
943                 exit(1);
944         }
945 }       
946 //**********************************************************************************************************************
947
948 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
949         try {
950                 string nameFile = "";
951                 
952                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
953                 
954                 //use unique.seqs to create new name and fastafile
955                 string inputString = "fasta=" + inputFile;
956                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
957                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
958                 m->mothurCalling = true;
959         
960                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
961                 uniqueCommand->execute();
962                 
963                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
964                 
965                 delete uniqueCommand;
966                 m->mothurCalling = false;
967                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
968                 
969                 nameFile = filenames["name"][0];
970                 inputFile = filenames["fasta"][0];
971                 
972                 return nameFile;
973         }
974         catch(exception& e) {
975                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
976                 exit(1);
977         }
978 }
979 //**********************************************************************************************************************
980 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
981         try {
982                 
983                 int totalSeqs = 0;
984                 int numChimeras = 0;
985                 
986                 for (int i = start; i < end; i++) {
987                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
988                         
989                         int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
990                         
991                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
992                         totalSeqs += numSeqs;
993                         
994                         if (m->control_pressed) { return 0; }
995                         
996                         //remove file made for uchime
997                         if (!m->debug) {  m->mothurRemove(filename);  }
998             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
999                         
1000                         //append files
1001                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1002                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1003                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1004                         
1005                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1006                 }       
1007                 
1008                 return totalSeqs;
1009                 
1010         }
1011         catch(exception& e) {
1012                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1013                 exit(1);
1014         }
1015 }       
1016 //**********************************************************************************************************************
1017
1018 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1019         try {
1020                 
1021                 outputFName = m->getFullPathName(outputFName);
1022                 filename = m->getFullPathName(filename);
1023                 alns = m->getFullPathName(alns);
1024                 
1025                 //to allow for spaces in the path
1026                 outputFName = "\"" + outputFName + "\"";
1027                 filename = "\"" + filename + "\"";
1028                 alns = "\"" + alns + "\"";
1029                                 
1030                 vector<char*> cPara;
1031                 
1032                 string uchimeCommand = uchimeLocation;
1033 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1034                 uchimeCommand += " ";
1035 #else
1036                 uchimeCommand = "\"" + uchimeCommand + "\"";
1037 #endif
1038                 
1039                 char* tempUchime;
1040                 tempUchime= new char[uchimeCommand.length()+1]; 
1041                 *tempUchime = '\0';
1042                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1043                 cPara.push_back(tempUchime);
1044                 
1045                 char* tempIn = new char[8]; 
1046                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1047                 //strcpy(tempIn, "--input"); 
1048                 cPara.push_back(tempIn);
1049                 char* temp = new char[filename.length()+1];
1050                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1051                 //strcpy(temp, filename.c_str());
1052                 cPara.push_back(temp);
1053                 
1054                 //are you using a reference file
1055                 if (templatefile != "self") {
1056                         //add reference file
1057                         char* tempRef = new char[5]; 
1058                         //strcpy(tempRef, "--db"); 
1059                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1060                         cPara.push_back(tempRef);  
1061                         char* tempR = new char[templatefile.length()+1];
1062                         //strcpy(tempR, templatefile.c_str());
1063                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1064                         cPara.push_back(tempR);
1065                 }
1066                 
1067                 char* tempO = new char[12]; 
1068                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1069                 //strcpy(tempO, "--uchimeout"); 
1070                 cPara.push_back(tempO);
1071                 char* tempout = new char[outputFName.length()+1];
1072                 //strcpy(tempout, outputFName.c_str());
1073                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1074                 cPara.push_back(tempout);
1075                 
1076                 if (chimealns) {
1077                         char* tempA = new char[13]; 
1078                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1079                         //strcpy(tempA, "--uchimealns"); 
1080                         cPara.push_back(tempA);
1081                         char* tempa = new char[alns.length()+1];
1082                         //strcpy(tempa, alns.c_str());
1083                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1084                         cPara.push_back(tempa);
1085                 }
1086                 
1087                 if (useAbskew) {
1088                         char* tempskew = new char[9];
1089                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1090                         //strcpy(tempskew, "--abskew"); 
1091                         cPara.push_back(tempskew);
1092                         char* tempSkew = new char[abskew.length()+1];
1093                         //strcpy(tempSkew, abskew.c_str());
1094                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1095                         cPara.push_back(tempSkew);
1096                 }
1097                 
1098                 if (useMinH) {
1099                         char* tempminh = new char[7]; 
1100                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1101                         //strcpy(tempminh, "--minh"); 
1102                         cPara.push_back(tempminh);
1103                         char* tempMinH = new char[minh.length()+1];
1104                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1105                         //strcpy(tempMinH, minh.c_str());
1106                         cPara.push_back(tempMinH);
1107                 }
1108                 
1109                 if (useMindiv) {
1110                         char* tempmindiv = new char[9]; 
1111                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1112                         //strcpy(tempmindiv, "--mindiv"); 
1113                         cPara.push_back(tempmindiv);
1114                         char* tempMindiv = new char[mindiv.length()+1];
1115                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1116                         //strcpy(tempMindiv, mindiv.c_str());
1117                         cPara.push_back(tempMindiv);
1118                 }
1119                 
1120                 if (useXn) {
1121                         char* tempxn = new char[5]; 
1122                         //strcpy(tempxn, "--xn"); 
1123                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1124                         cPara.push_back(tempxn);
1125                         char* tempXn = new char[xn.length()+1];
1126                         //strcpy(tempXn, xn.c_str());
1127                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1128                         cPara.push_back(tempXn);
1129                 }
1130                 
1131                 if (useDn) {
1132                         char* tempdn = new char[5]; 
1133                         //strcpy(tempdn, "--dn"); 
1134                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1135                         cPara.push_back(tempdn);
1136                         char* tempDn = new char[dn.length()+1];
1137                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1138                         //strcpy(tempDn, dn.c_str());
1139                         cPara.push_back(tempDn);
1140                 }
1141                 
1142                 if (useXa) {
1143                         char* tempxa = new char[5]; 
1144                         //strcpy(tempxa, "--xa"); 
1145                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1146                         cPara.push_back(tempxa);
1147                         char* tempXa = new char[xa.length()+1];
1148                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1149                         //strcpy(tempXa, xa.c_str());
1150                         cPara.push_back(tempXa);
1151                 }
1152                 
1153                 if (useChunks) {
1154                         char* tempchunks = new char[9]; 
1155                         //strcpy(tempchunks, "--chunks"); 
1156                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1157                         cPara.push_back(tempchunks);
1158                         char* tempChunks = new char[chunks.length()+1];
1159                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1160                         //strcpy(tempChunks, chunks.c_str());
1161                         cPara.push_back(tempChunks);
1162                 }
1163                 
1164                 if (useMinchunk) {
1165                         char* tempminchunk = new char[11]; 
1166                         //strcpy(tempminchunk, "--minchunk"); 
1167                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1168                         cPara.push_back(tempminchunk);
1169                         char* tempMinchunk = new char[minchunk.length()+1];
1170                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1171                         //strcpy(tempMinchunk, minchunk.c_str());
1172                         cPara.push_back(tempMinchunk);
1173                 }
1174                 
1175                 if (useIdsmoothwindow) {
1176                         char* tempidsmoothwindow = new char[17]; 
1177                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1178                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1179                         cPara.push_back(tempidsmoothwindow);
1180                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1181                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1182                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1183                         cPara.push_back(tempIdsmoothwindow);
1184                 }
1185                 
1186                 /*if (useMinsmoothid) {
1187                         char* tempminsmoothid = new char[14]; 
1188                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1189                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1190                         cPara.push_back(tempminsmoothid);
1191                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1192                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1193                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1194                         cPara.push_back(tempMinsmoothid);
1195                 }*/
1196                 
1197                 if (useMaxp) {
1198                         char* tempmaxp = new char[7]; 
1199                         //strcpy(tempmaxp, "--maxp"); 
1200                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1201                         cPara.push_back(tempmaxp);
1202                         char* tempMaxp = new char[maxp.length()+1];
1203                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1204                         //strcpy(tempMaxp, maxp.c_str());
1205                         cPara.push_back(tempMaxp);
1206                 }
1207                 
1208                 if (!skipgaps) {
1209                         char* tempskipgaps = new char[13]; 
1210                         //strcpy(tempskipgaps, "--[no]skipgaps");
1211                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1212                         cPara.push_back(tempskipgaps);
1213                 }
1214                 
1215                 if (!skipgaps2) {
1216                         char* tempskipgaps2 = new char[14]; 
1217                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1218                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1219                         cPara.push_back(tempskipgaps2);
1220                 }
1221                 
1222                 if (useMinlen) {
1223                         char* tempminlen = new char[9]; 
1224                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1225                         //strcpy(tempminlen, "--minlen"); 
1226                         cPara.push_back(tempminlen);
1227                         char* tempMinlen = new char[minlen.length()+1];
1228                         //strcpy(tempMinlen, minlen.c_str());
1229                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1230                         cPara.push_back(tempMinlen);
1231                 }
1232                 
1233                 if (useMaxlen) {
1234                         char* tempmaxlen = new char[9]; 
1235                         //strcpy(tempmaxlen, "--maxlen"); 
1236                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1237                         cPara.push_back(tempmaxlen);
1238                         char* tempMaxlen = new char[maxlen.length()+1];
1239                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1240                         //strcpy(tempMaxlen, maxlen.c_str());
1241                         cPara.push_back(tempMaxlen);
1242                 }
1243                 
1244                 if (ucl) {
1245                         char* tempucl = new char[5]; 
1246                         strcpy(tempucl, "--ucl"); 
1247                         cPara.push_back(tempucl);
1248                 }
1249                 
1250                 if (useQueryfract) {
1251                         char* tempqueryfract = new char[13]; 
1252                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1253                         //strcpy(tempqueryfract, "--queryfract"); 
1254                         cPara.push_back(tempqueryfract);
1255                         char* tempQueryfract = new char[queryfract.length()+1];
1256                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1257                         //strcpy(tempQueryfract, queryfract.c_str());
1258                         cPara.push_back(tempQueryfract);
1259                 }
1260                 
1261                 
1262                 char** uchimeParameters;
1263                 uchimeParameters = new char*[cPara.size()];
1264                 string commandString = "";
1265                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1266                 //int numArgs = cPara.size();
1267                 
1268                 //uchime_main(numArgs, uchimeParameters); 
1269                 //cout << "commandString = " << commandString << endl;
1270 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1271 #else
1272                 commandString = "\"" + commandString + "\"";
1273 #endif
1274         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1275                 system(commandString.c_str());
1276                 
1277                 //free memory
1278                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1279                 delete[] uchimeParameters; 
1280                 
1281                 //remove "" from filenames
1282                 outputFName = outputFName.substr(1, outputFName.length()-2);
1283                 filename = filename.substr(1, filename.length()-2);
1284                 alns = alns.substr(1, alns.length()-2);
1285                 
1286                 if (m->control_pressed) { return 0; }
1287                 
1288                 //create accnos file from uchime results
1289                 ifstream in; 
1290                 m->openInputFile(outputFName, in);
1291                 
1292                 ofstream out;
1293                 m->openOutputFile(accnos, out);
1294                 
1295                 int num = 0;
1296                 numChimeras = 0;
1297                 while(!in.eof()) {
1298                         
1299                         if (m->control_pressed) { break; }
1300                         
1301                         string name = "";
1302                         string chimeraFlag = "";
1303                         in >> chimeraFlag >> name;
1304                         
1305                         //fix name if needed
1306                         if (templatefile == "self") { 
1307                                 name = name.substr(0, name.length()-1); //rip off last /
1308                                 name = name.substr(0, name.find_last_of('/'));
1309                         }
1310                         
1311                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1312                         m->gobble(in);
1313                         
1314                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1315                         num++;
1316                 }
1317                 in.close();
1318                 out.close();
1319                 
1320                 return num;
1321         }
1322         catch(exception& e) {
1323                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1324                 exit(1);
1325         }
1326 }
1327 /**************************************************************************************************/
1328
1329 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1330         try {
1331                 
1332                 processIDS.clear();
1333                 int process = 1;
1334                 int num = 0;
1335                 vector<string> files;
1336                 
1337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1338                 //break up file into multiple files
1339                 m->divideFile(filename, processors, files);
1340                 
1341                 if (m->control_pressed) {  return 0;  }
1342                                 
1343                 //loop through and create all the processes you want
1344                 while (process != processors) {
1345                         int pid = fork();
1346                         
1347                         if (pid > 0) {
1348                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1349                                 process++;
1350                         }else if (pid == 0){
1351                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1352                                 
1353                                 //pass numSeqs to parent
1354                                 ofstream out;
1355                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1356                                 m->openOutputFile(tempFile, out);
1357                                 out << num << endl;
1358                                 out << numChimeras << endl;
1359                                 out.close();
1360                                 
1361                                 exit(0);
1362                         }else { 
1363                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1364                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1365                                 exit(0);
1366                         }
1367                 }
1368                 
1369                 //do my part
1370                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1371                 
1372                 //force parent to wait until all the processes are done
1373                 for (int i=0;i<processIDS.size();i++) { 
1374                         int temp = processIDS[i];
1375                         wait(&temp);
1376                 }
1377                 
1378                 for (int i = 0; i < processIDS.size(); i++) {
1379                         ifstream in;
1380                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1381                         m->openInputFile(tempFile, in);
1382                         if (!in.eof()) { 
1383                                 int tempNum = 0; 
1384                                 in >> tempNum; m->gobble(in);
1385                                 num += tempNum; 
1386                                 in >> tempNum;
1387                                 numChimeras += tempNum;
1388                         }
1389                         in.close(); m->mothurRemove(tempFile);
1390                 }
1391 #else
1392                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1393                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1394                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1395                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1396                 
1397                 //divide file
1398                 int count = 0;
1399                 int spot = 0;
1400                 map<int, ofstream*> filehandles;
1401                 map<int, ofstream*>::iterator it3;
1402                 
1403                 ofstream* temp;
1404                 for (int i = 0; i < processors; i++) {
1405                         temp = new ofstream;
1406                         filehandles[i] = temp;
1407                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1408                         files.push_back(filename+toString(i)+".temp");
1409                 }
1410                 
1411                 ifstream in;
1412                 m->openInputFile(filename, in);
1413                 
1414                 while(!in.eof()) {
1415                         
1416                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1417                         
1418                         Sequence tempSeq(in); m->gobble(in); 
1419                         
1420                         if (tempSeq.getName() != "") {
1421                                 tempSeq.printSequence(*(filehandles[spot])); 
1422                                 spot++; count++;
1423                                 if (spot == processors) { spot = 0; }
1424                         }
1425                 }
1426                 in.close();
1427                 
1428                 //delete memory
1429                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1430                         (*(it3->second)).close();
1431                         delete it3->second;
1432                 }
1433                 
1434                 //sanity check for number of processors
1435                 if (count < processors) { processors = count; }
1436                 
1437                 vector<uchimeData*> pDataArray; 
1438                 DWORD   dwThreadIdArray[processors-1];
1439                 HANDLE  hThreadArray[processors-1]; 
1440                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1441                 
1442                 //Create processor worker threads.
1443                 for( int i=1; i<processors; i++ ){
1444                         // Allocate memory for thread data.
1445                         string extension = toString(i) + ".temp";
1446                         
1447                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1448                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1449                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1450                         
1451                         pDataArray.push_back(tempUchime);
1452                         processIDS.push_back(i);
1453                         
1454                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1455                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1456                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1457                 }
1458                 
1459                 
1460                 //using the main process as a worker saves time and memory
1461                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1462                 
1463                 //Wait until all threads have terminated.
1464                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1465                 
1466                 //Close all thread handles and free memory allocations.
1467                 for(int i=0; i < pDataArray.size(); i++){
1468                         num += pDataArray[i]->count;
1469                         numChimeras += pDataArray[i]->numChimeras;
1470                         CloseHandle(hThreadArray[i]);
1471                         delete pDataArray[i];
1472                 }
1473 #endif          
1474                 
1475                 //append output files
1476                 for(int i=0;i<processIDS.size();i++){
1477                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1478                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1479                         
1480                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1481                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1482                         
1483                         if (chimealns) {
1484                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1485                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1486                         }
1487                 }
1488                 
1489                 //get rid of the file pieces.
1490                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1491                 return num;     
1492         }
1493         catch(exception& e) {
1494                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1495                 exit(1);
1496         }
1497 }
1498 /**************************************************************************************************/
1499
1500 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1501         try {
1502                 
1503                 processIDS.clear();
1504                 int process = 1;
1505                 int num = 0;
1506                 
1507                 //sanity check
1508                 if (groups.size() < processors) { processors = groups.size(); }
1509                 
1510                 //divide the groups between the processors
1511                 vector<linePair> lines;
1512                 int numGroupsPerProcessor = groups.size() / processors;
1513                 for (int i = 0; i < processors; i++) {
1514                         int startIndex =  i * numGroupsPerProcessor;
1515                         int endIndex = (i+1) * numGroupsPerProcessor;
1516                         if(i == (processors - 1)){      endIndex = groups.size();       }
1517                         lines.push_back(linePair(startIndex, endIndex));
1518                 }
1519                 
1520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1521                                 
1522                 //loop through and create all the processes you want
1523                 while (process != processors) {
1524                         int pid = fork();
1525                         
1526                         if (pid > 0) {
1527                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1528                                 process++;
1529                         }else if (pid == 0){
1530                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1531                                 
1532                                 //pass numSeqs to parent
1533                                 ofstream out;
1534                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1535                                 m->openOutputFile(tempFile, out);
1536                                 out << num << endl;
1537                                 out.close();
1538                                 
1539                                 exit(0);
1540                         }else { 
1541                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1542                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1543                                 exit(0);
1544                         }
1545                 }
1546                 
1547                 //do my part
1548                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1549                 
1550                 //force parent to wait until all the processes are done
1551                 for (int i=0;i<processIDS.size();i++) { 
1552                         int temp = processIDS[i];
1553                         wait(&temp);
1554                 }
1555                 
1556                 for (int i = 0; i < processIDS.size(); i++) {
1557                         ifstream in;
1558                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1559                         m->openInputFile(tempFile, in);
1560                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1561                         in.close(); m->mothurRemove(tempFile);
1562                 }
1563                                 
1564 #else
1565                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1566                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1567                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1568                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1569                 
1570                 vector<uchimeData*> pDataArray; 
1571                 DWORD   dwThreadIdArray[processors-1];
1572                 HANDLE  hThreadArray[processors-1]; 
1573                 
1574                 //Create processor worker threads.
1575                 for( int i=1; i<processors; i++ ){
1576                         // Allocate memory for thread data.
1577                         string extension = toString(i) + ".temp";
1578                         
1579                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1580                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1581                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1582                         
1583                         pDataArray.push_back(tempUchime);
1584                         processIDS.push_back(i);
1585                         
1586                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1587                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1588                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1589                 }
1590                 
1591                 
1592                 //using the main process as a worker saves time and memory
1593                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1594                 
1595                 //Wait until all threads have terminated.
1596                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1597                 
1598                 //Close all thread handles and free memory allocations.
1599                 for(int i=0; i < pDataArray.size(); i++){
1600                         num += pDataArray[i]->count;
1601                         CloseHandle(hThreadArray[i]);
1602                         delete pDataArray[i];
1603                 }
1604 #endif          
1605                 
1606                                 
1607                 //append output files
1608                 for(int i=0;i<processIDS.size();i++){
1609                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1610                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1611                         
1612                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1613                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1614                         
1615                         if (chimealns) {
1616                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1617                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1618                         }
1619                 }
1620                 
1621                 return num;     
1622                 
1623         }
1624         catch(exception& e) {
1625                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1626                 exit(1);
1627         }
1628 }
1629 /**************************************************************************************************/
1630