]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
paralellized chimera.uchime for windows for both by group and with a template.
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){   
57         try {
58                 string helpString = "";
59                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
68                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
86 #ifdef USE_MPI
87                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
88 #endif
89                 helpString += "The chimera.uchime command should be in the following format: \n";
90                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
102         try {
103                 abort = true; calledHelp = true;
104                 setParameters();
105                 vector<string> tempOutNames;
106                 outputTypes["chimera"] = tempOutNames;
107                 outputTypes["accnos"] = tempOutNames;
108                 outputTypes["alns"] = tempOutNames;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
112                 exit(1);
113         }
114 }
115 //***************************************************************************************************************
116 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
117         try {
118                 abort = false; calledHelp = false; 
119                 ReferenceDB* rdb = ReferenceDB::getInstance();
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter("chimera.uchime");
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         vector<string> tempOutNames;
140                         outputTypes["chimera"] = tempOutNames;
141                         outputTypes["accnos"] = tempOutNames;
142                         outputTypes["alns"] = tempOutNames;
143                         
144                         //if the user changes the input directory command factory will send this info to us in the output parameter 
145                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
146                         if (inputDir == "not found"){   inputDir = "";          }
147                         
148                         //check for required parameters
149                         fastafile = validParameter.validFile(parameters, "fasta", false);
150                         if (fastafile == "not found") {                                 
151                                 //if there is a current fasta file, use it
152                                 string filename = m->getFastaFile(); 
153                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
154                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
155                         }else { 
156                                 m->splitAtDash(fastafile, fastaFileNames);
157                                 
158                                 //go through files and make sure they are good, if not, then disregard them
159                                 for (int i = 0; i < fastaFileNames.size(); i++) {
160                                         
161                                         bool ignore = false;
162                                         if (fastaFileNames[i] == "current") { 
163                                                 fastaFileNames[i] = m->getFastaFile(); 
164                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
165                                                 else {  
166                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
167                                                         //erase from file list
168                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
169                                                         i--;
170                                                 }
171                                         }
172                                         
173                                         if (!ignore) {
174                                                 
175                                                 if (inputDir != "") {
176                                                         string path = m->hasPath(fastaFileNames[i]);
177                                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
179                                                 }
180                                                 
181                                                 int ableToOpen;
182                                                 ifstream in;
183                                                 
184                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
185                                                 
186                                                 //if you can't open it, try default location
187                                                 if (ableToOpen == 1) {
188                                                         if (m->getDefaultPath() != "") { //default path is set
189                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
190                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
191                                                                 ifstream in2;
192                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
193                                                                 in2.close();
194                                                                 fastaFileNames[i] = tryPath;
195                                                         }
196                                                 }
197                                                 
198                                                 if (ableToOpen == 1) {
199                                                         if (m->getOutputDir() != "") { //default path is set
200                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
201                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
202                                                                 ifstream in2;
203                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
204                                                                 in2.close();
205                                                                 fastaFileNames[i] = tryPath;
206                                                         }
207                                                 }
208                                                 
209                                                 in.close();
210                                                 
211                                                 if (ableToOpen == 1) { 
212                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
213                                                         //erase from file list
214                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
215                                                         i--;
216                                                 }else {
217                                                         m->setFastaFile(fastaFileNames[i]);
218                                                 }
219                                         }
220                                 }
221                                 
222                                 //make sure there is at least one valid file left
223                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
224                         }
225                         
226                         
227                         //check for required parameters
228                         bool hasName = true;
229                         namefile = validParameter.validFile(parameters, "name", false);
230                         if (namefile == "not found") { namefile = "";  hasName = false; }
231                         else { 
232                                 m->splitAtDash(namefile, nameFileNames);
233                                 
234                                 //go through files and make sure they are good, if not, then disregard them
235                                 for (int i = 0; i < nameFileNames.size(); i++) {
236                                         
237                                         bool ignore = false;
238                                         if (nameFileNames[i] == "current") { 
239                                                 nameFileNames[i] = m->getNameFile(); 
240                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
241                                                 else {  
242                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
243                                                         //erase from file list
244                                                         nameFileNames.erase(nameFileNames.begin()+i);
245                                                         i--;
246                                                 }
247                                         }
248                                         
249                                         if (!ignore) {
250                                                 
251                                                 if (inputDir != "") {
252                                                         string path = m->hasPath(nameFileNames[i]);
253                                                         //if the user has not given a path then, add inputdir. else leave path alone.
254                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
255                                                 }
256                                                 
257                                                 int ableToOpen;
258                                                 ifstream in;
259                                                 
260                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
261                                                 
262                                                 //if you can't open it, try default location
263                                                 if (ableToOpen == 1) {
264                                                         if (m->getDefaultPath() != "") { //default path is set
265                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
266                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
267                                                                 ifstream in2;
268                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
269                                                                 in2.close();
270                                                                 nameFileNames[i] = tryPath;
271                                                         }
272                                                 }
273                                                 
274                                                 if (ableToOpen == 1) {
275                                                         if (m->getOutputDir() != "") { //default path is set
276                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
277                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
278                                                                 ifstream in2;
279                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280                                                                 in2.close();
281                                                                 nameFileNames[i] = tryPath;
282                                                         }
283                                                 }
284                                                 
285                                                 in.close();
286                                                 
287                                                 if (ableToOpen == 1) { 
288                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
289                                                         //erase from file list
290                                                         nameFileNames.erase(nameFileNames.begin()+i);
291                                                         i--;
292                                                 }else {
293                                                         m->setNameFile(nameFileNames[i]);
294                                                 }
295                                         }
296                                 }
297                                 
298                                 //make sure there is at least one valid file left
299                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
300                         }
301                         
302                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
303                         
304                         bool hasGroup = true;
305                         groupfile = validParameter.validFile(parameters, "group", false);
306                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
307                         else { 
308                                 m->splitAtDash(groupfile, groupFileNames);
309                                 
310                                 //go through files and make sure they are good, if not, then disregard them
311                                 for (int i = 0; i < groupFileNames.size(); i++) {
312                                         
313                                         bool ignore = false;
314                                         if (groupFileNames[i] == "current") { 
315                                                 groupFileNames[i] = m->getGroupFile(); 
316                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
317                                                 else {  
318                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
319                                                         //erase from file list
320                                                         groupFileNames.erase(groupFileNames.begin()+i);
321                                                         i--;
322                                                 }
323                                         }
324                                         
325                                         if (!ignore) {
326                                                 
327                                                 if (inputDir != "") {
328                                                         string path = m->hasPath(groupFileNames[i]);
329                                                         //if the user has not given a path then, add inputdir. else leave path alone.
330                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
331                                                 }
332                                                 
333                                                 int ableToOpen;
334                                                 ifstream in;
335                                                 
336                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
337                                                 
338                                                 //if you can't open it, try default location
339                                                 if (ableToOpen == 1) {
340                                                         if (m->getDefaultPath() != "") { //default path is set
341                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
342                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
343                                                                 ifstream in2;
344                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
345                                                                 in2.close();
346                                                                 groupFileNames[i] = tryPath;
347                                                         }
348                                                 }
349                                                 
350                                                 if (ableToOpen == 1) {
351                                                         if (m->getOutputDir() != "") { //default path is set
352                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
353                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
354                                                                 ifstream in2;
355                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
356                                                                 in2.close();
357                                                                 groupFileNames[i] = tryPath;
358                                                         }
359                                                 }
360                                                 
361                                                 in.close();
362                                                 
363                                                 if (ableToOpen == 1) { 
364                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
365                                                         //erase from file list
366                                                         groupFileNames.erase(groupFileNames.begin()+i);
367                                                         i--;
368                                                 }else {
369                                                         m->setGroupFile(groupFileNames[i]);
370                                                 }
371                                         }
372                                 }
373                                 
374                                 //make sure there is at least one valid file left
375                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
376                         }
377                         
378                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
379                         
380                         
381                         //if the user changes the output directory command factory will send this info to us in the output parameter 
382                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
383                         
384                         string path;
385                         it = parameters.find("reference");
386                         //user has given a template file
387                         if(it != parameters.end()){ 
388                                 if (it->second == "self") { templatefile = "self"; }
389                                 else {
390                                         path = m->hasPath(it->second);
391                                         //if the user has not given a path then, add inputdir. else leave path alone.
392                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
393                                         
394                                         templatefile = validParameter.validFile(parameters, "reference", true);
395                                         if (templatefile == "not open") { abort = true; }
396                                         else if (templatefile == "not found") { //check for saved reference sequences
397                                                 if (rdb->getSavedReference() != "") {
398                                                         templatefile = rdb->getSavedReference();
399                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
400                                                 }else {
401                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
402                                                         m->mothurOutEndLine();
403                                                         abort = true; 
404                                                 }
405                                         }
406                                 }
407                         }else if (hasName) {  templatefile = "self"; }
408                         else { 
409                                 if (rdb->getSavedReference() != "") {
410                                         templatefile = rdb->getSavedReference();
411                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
412                                 }else {
413                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
414                                         m->mothurOutEndLine();
415                                         templatefile = ""; abort = true; 
416                                 } 
417                         }
418                                 
419                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
420                         m->setProcessors(temp);
421                         convert(temp, processors);
422                         
423                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
424                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
425                         
426                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
427                         chimealns = m->isTrue(temp); 
428                         
429                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
430                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
431                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
432                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
433                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
434                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
435                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
436                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
437                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
438                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
439                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
440                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
441                         
442                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
443                         ucl = m->isTrue(temp);
444                         
445                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
446                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
447                         
448                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
449                         skipgaps = m->isTrue(temp); 
450
451                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
452                         skipgaps2 = m->isTrue(temp); 
453                         
454                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
455                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
456                         
457                         //look for uchime exe
458                         path = m->argv;
459                         string tempPath = path;
460                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
461                         path = path.substr(0, (tempPath.find_last_of('m')));
462                         
463                         string uchimeCommand;
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
465                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
466 #else
467                         uchimeCommand = path + "uchime.exe";
468 #endif
469                         
470                         //test to make sure uchime exists
471                         ifstream in;
472                         uchimeCommand = m->getFullPathName(uchimeCommand);
473                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
474                         if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; }
475                 }
476         }
477         catch(exception& e) {
478                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
479                 exit(1);
480         }
481 }
482 //***************************************************************************************************************
483
484 int ChimeraUchimeCommand::execute(){
485         try{
486                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
487                 
488                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
489                 
490                 for (int s = 0; s < fastaFileNames.size(); s++) {
491                         
492                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
493                         
494                         int start = time(NULL); 
495                         string nameFile = "";
496                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
497                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
498                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
499                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
500                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
501                                 
502                         //you provided a groupfile
503                         string groupFile = "";
504                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
505                         
506                         if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
507
508                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
509                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
510                                         nameFile = nameFileNames[s];
511                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
512                                                                         
513                                 map<string, string> seqs;  
514                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
515
516                                 //read namefile
517                                 vector<seqPriorityNode> nameMapCount;
518                                 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
519                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
520                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
521                                 
522                                 printFile(nameMapCount, newFasta);
523                                 fastaFileNames[s] = newFasta;
524                         }
525                         
526                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
527                         
528                         if (groupFile != "") {
529                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
530                                         nameFile = nameFileNames[s];
531                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
532                                 
533                                 //Parse sequences by group
534                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
535                                 vector<string> groups = parser.getNamesOfGroups();
536                                         
537                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
538                                                                 
539                                 //clears files
540                                 ofstream out, out1, out2;
541                                 m->openOutputFile(outputFileName, out); out.close(); 
542                                 m->openOutputFile(accnosFileName, out1); out1.close();
543                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
544                                 int totalSeqs = 0;
545                                 
546                                 if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
547                                 else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups);                      }
548
549                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
550
551                                 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
552                                 
553                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
554                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
555                                 
556                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
557                                         
558                         }else{
559                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
560                         
561                                 int numSeqs = 0;
562                                 int numChimeras = 0;
563         //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
564                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
565                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
566         //#else
567         //                      numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras);
568         //#endif
569                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
570                         
571                                 //remove file made for uchime
572                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
573                         
574                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
575                         }
576                         
577                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
578                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
579                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
580                 }
581         
582                 //set accnos file as new current accnosfile
583                 string current = "";
584                 itTypes = outputTypes.find("accnos");
585                 if (itTypes != outputTypes.end()) {
586                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
587                 }
588                 
589                 m->mothurOutEndLine();
590                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
591                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
592                 m->mothurOutEndLine();
593                 
594                 return 0;
595                 
596         }
597         catch(exception& e) {
598                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
599                 exit(1);
600         }
601 }
602 //**********************************************************************************************************************
603 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
604         try {
605                 map<string, string> uniqueNames = parser.getAllSeqsMap();
606                 map<string, string>::iterator itUnique;
607                 int total = 0;
608                 
609                 //edit accnos file
610                 ifstream in2; 
611                 m->openInputFile(accnosFileName, in2);
612                 
613                 ofstream out2;
614                 m->openOutputFile(accnosFileName+".temp", out2);
615                 
616                 string name;
617                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
618                 set<string>::iterator itNames;
619                 set<string> chimerasInFile;
620                 set<string>::iterator itChimeras;
621
622                 
623                 while (!in2.eof()) {
624                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
625                         
626                         in2 >> name; m->gobble(in2);
627                         
628                         //find unique name
629                         itUnique = uniqueNames.find(name);
630                         
631                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
632                         else {
633                                 itChimeras = chimerasInFile.find((itUnique->second));
634                                 
635                                 if (itChimeras == chimerasInFile.end()) {
636                                         out2 << itUnique->second << endl;
637                                         chimerasInFile.insert((itUnique->second));
638                                         total++;
639                                 }
640                         }
641                 }
642                 in2.close();
643                 out2.close();
644                 
645                 m->mothurRemove(accnosFileName);
646                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
647                 
648                 
649                 
650                 //edit chimera file
651                 ifstream in; 
652                 m->openInputFile(outputFileName, in);
653                 
654                 ofstream out;
655                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
656                 
657                 float temp1;
658                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
659                 name = "";
660                 namesInFile.clear();    
661                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
662                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
663                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
664                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
665                 */
666                 
667                 while (!in.eof()) {
668                         
669                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
670                         
671                         bool print = false;
672                         in >> temp1;    m->gobble(in);
673                         in >> name;             m->gobble(in);
674                         in >> parent1;  m->gobble(in);
675                         in >> parent2;  m->gobble(in);
676                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
677                         m->gobble(in);
678                         
679                         //parse name - name will look like U68590/ab=1/
680                         string restOfName = "";
681                         int pos = name.find_first_of('/');
682                         if (pos != string::npos) {
683                                 restOfName = name.substr(pos);
684                                 name = name.substr(0, pos);
685                         }
686                         
687                         //find unique name
688                         itUnique = uniqueNames.find(name);
689                         
690                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
691                         else {
692                                 name = itUnique->second;
693                                 //is this name already in the file
694                                 itNames = namesInFile.find((name));
695                                 
696                                 if (itNames == namesInFile.end()) { //no not in file
697                                         if (flag == "N") { //are you really a no??
698                                                 //is this sequence really not chimeric??
699                                                 itChimeras = chimerasInFile.find(name);
700                                                 
701                                                 //then you really are a no so print, otherwise skip
702                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
703                                         }else{ print = true; }
704                                 }
705                         }
706                         
707                         if (print) {
708                                 out << temp1 << '\t' << name << restOfName << '\t';
709                                 namesInFile.insert(name);
710                                 
711                                 //parse parent1 names
712                                 if (parent1 != "*") {
713                                         restOfName = "";
714                                         pos = parent1.find_first_of('/');
715                                         if (pos != string::npos) {
716                                                 restOfName = parent1.substr(pos);
717                                                 parent1 = parent1.substr(0, pos);
718                                         }
719                                         
720                                         itUnique = uniqueNames.find(parent1);
721                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
722                                         else {  out << itUnique->second << restOfName << '\t';  }
723                                 }else { out << parent1 << '\t'; }
724                                 
725                                 //parse parent2 names
726                                 if (parent2 != "*") {
727                                         restOfName = "";
728                                         pos = parent2.find_first_of('/');
729                                         if (pos != string::npos) {
730                                                 restOfName = parent2.substr(pos);
731                                                 parent2 = parent2.substr(0, pos);
732                                         }
733                                         
734                                         itUnique = uniqueNames.find(parent2);
735                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
736                                         else {  out << itUnique->second << restOfName << '\t';  }
737                                 }else { out << parent2 << '\t'; }
738                                 
739                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
740                         }
741                 }
742                 in.close();
743                 out.close();
744                 
745                 m->mothurRemove(outputFileName);
746                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
747                 
748                                 
749                 //edit anls file
750                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
751                 /*
752                  ------------------------------------------------------------------------
753                  Query   (  179 nt) F21Fcsw_11639/ab=591/
754                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
755                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
756                  
757                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
758                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
759                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
760                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
761                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
762                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
763                  
764                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
765                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
766                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
767                  Diffs      NNN     N N  N                   N  N BB    NNN                              
768                  Votes      000     0 0  0                   0  0 ++    000                              
769                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
770                  
771                  A   159 TGGAACTGAGACACGGTCCAA 179
772                  Q   159 TGGAACTGAGACACGGTCCAA 179
773                  B   161 TGGAACTGAGACACGGTCCAA 181
774                  Diffs                        
775                  Votes                        
776                  Model   BBBBBBBBBBBBBBBBBBBBB
777                  
778                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
779                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
780                 */
781                 if (chimealns) {
782                         ifstream in3; 
783                         m->openInputFile(alnsFileName, in3);
784                 
785                         ofstream out3;
786                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
787                 
788                         name = "";
789                         namesInFile.clear();
790                         string line = "";
791                         
792                         while (!in3.eof()) {
793                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
794                                 
795                                 line = "";
796                                 line = m->getline(in3); 
797                                 string temp = "";
798                                 
799                                 if (line != "") {
800                                         istringstream iss(line);
801                                         iss >> temp;
802                                         
803                                         //are you a name line
804                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
805                                                 int spot = 0;
806                                                 for (int i = 0; i < line.length(); i++) {
807                                                         spot = i;
808                                                         if (line[i] == ')') { break; }
809                                                         else { out3 << line[i]; }
810                                                 }
811                                                 
812                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
813                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
814                                                 else {
815                                                         out << line[spot] << line[spot+1];
816                                                         
817                                                         name = line.substr(spot+2);
818                                                         
819                                                         //parse name - name will either look like U68590/ab=1/ or U68590
820                                                         string restOfName = "";
821                                                         int pos = name.find_first_of('/');
822                                                         if (pos != string::npos) {
823                                                                 restOfName = name.substr(pos);
824                                                                 name = name.substr(0, pos);
825                                                         }
826                                                         
827                                                         //find unique name
828                                                         itUnique = uniqueNames.find(name);
829                                                         
830                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
831                                                         else {
832                                                                 //only limit repeats on query names
833                                                                 if (temp == "Query") {
834                                                                         itNames = namesInFile.find((itUnique->second));
835                                                                         
836                                                                         if (itNames == namesInFile.end()) {
837                                                                                 out << itUnique->second << restOfName << endl;
838                                                                                 namesInFile.insert((itUnique->second));
839                                                                         }
840                                                                 }else { out << itUnique->second << restOfName << endl;  }
841                                                         }
842                                                         
843                                                 }
844                                                 
845                                         }else { //not need to alter line
846                                                 out3 << line << endl;
847                                         }
848                                 }else { out3 << endl; }
849                         }
850                         in3.close();
851                         out3.close();
852                         
853                         m->mothurRemove(alnsFileName);
854                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
855                 }
856                 
857                 return total;
858         }
859         catch(exception& e) {
860                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
861                 exit(1);
862         }
863 }       
864 //**********************************************************************************************************************
865 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
866         try {
867                 
868                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
869                 
870                 ofstream out;
871                 m->openOutputFile(filename, out);
872                 
873                 //print new file in order of
874                 for (int i = 0; i < nameMapCount.size(); i++) {
875                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
876                 }
877                 out.close();
878                 
879                 return 0;
880         }
881         catch(exception& e) {
882                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
883                 exit(1);
884         }
885 }       
886 //**********************************************************************************************************************
887 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
888         try {
889                 //create input file for uchime
890                 //read through fastafile and store info
891                 ifstream in;
892                 m->openInputFile(filename, in);
893                 
894                 while (!in.eof()) {
895                         
896                         if (m->control_pressed) { in.close(); return 0; }
897                         
898                         Sequence seq(in); m->gobble(in);
899                         seqs[seq.getName()] = seq.getAligned();
900                 }
901                 in.close();
902                 
903                 return 0;
904         }
905         catch(exception& e) {
906                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
907                 exit(1);
908         }
909 }       
910 //**********************************************************************************************************************
911
912 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
913         try {
914                 string nameFile = "";
915                 
916                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
917                 
918                 //use unique.seqs to create new name and fastafile
919                 string inputString = "fasta=" + inputFile;
920                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
921                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
922                 
923                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
924                 uniqueCommand->execute();
925                 
926                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
927                 
928                 delete uniqueCommand;
929                 
930                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
931                 
932                 nameFile = filenames["name"][0];
933                 inputFile = filenames["fasta"][0];
934                 
935                 return nameFile;
936         }
937         catch(exception& e) {
938                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
939                 exit(1);
940         }
941 }
942 //**********************************************************************************************************************
943 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
944         try {
945                 
946                 int totalSeqs = 0;
947                 int numChimeras = 0;
948                 
949                 for (int i = start; i < end; i++) {
950                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
951                         
952                         int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
953                         
954                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
955                         totalSeqs += numSeqs;
956                         
957                         if (m->control_pressed) { return 0; }
958                         
959                         //remove file made for uchime
960                         m->mothurRemove(filename);
961                         
962                         //append files
963                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
964                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
965                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
966                         
967                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
968                 }       
969                 
970                 return totalSeqs;
971                 
972         }
973         catch(exception& e) {
974                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
975                 exit(1);
976         }
977 }       
978 //**********************************************************************************************************************
979
980 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
981         try {
982                 //to allow for spaces in the path
983                 outputFName = "\"" + outputFName + "\"";
984                 filename = "\"" + filename + "\"";
985                 alns = "\"" + alns + "\"";
986                                 
987                 vector<char*> cPara;
988         
989                 char* tempUchime;
990 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
991                 tempUchime= new char[10];  
992                 *tempUchime = '\0';
993                 strncat(tempUchime, "./uchime ", 9); 
994 #else
995                 tempUchime= new char[8]; 
996                 *tempUchime = '\0';
997                 strncat(tempUchime, "uchime ", 7); 
998 #endif
999                 cPara.push_back(tempUchime);
1000                 
1001                 char* tempIn = new char[8]; 
1002                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1003                 //strcpy(tempIn, "--input"); 
1004                 cPara.push_back(tempIn);
1005                 char* temp = new char[filename.length()+1];
1006                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1007                 //strcpy(temp, filename.c_str());
1008                 cPara.push_back(temp);
1009                 
1010                 //are you using a reference file
1011                 if (templatefile != "self") {
1012                         //add reference file
1013                         char* tempRef = new char[5]; 
1014                         //strcpy(tempRef, "--db"); 
1015                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1016                         cPara.push_back(tempRef);  
1017                         char* tempR = new char[templatefile.length()+1];
1018                         //strcpy(tempR, templatefile.c_str());
1019                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1020                         cPara.push_back(tempR);
1021                 }
1022                 
1023                 char* tempO = new char[12]; 
1024                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1025                 //strcpy(tempO, "--uchimeout"); 
1026                 cPara.push_back(tempO);
1027                 char* tempout = new char[outputFName.length()+1];
1028                 //strcpy(tempout, outputFName.c_str());
1029                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1030                 cPara.push_back(tempout);
1031                 
1032                 if (chimealns) {
1033                         char* tempA = new char[13]; 
1034                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1035                         //strcpy(tempA, "--uchimealns"); 
1036                         cPara.push_back(tempA);
1037                         char* tempa = new char[alns.length()+1];
1038                         //strcpy(tempa, alns.c_str());
1039                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1040                         cPara.push_back(tempa);
1041                 }
1042                 
1043                 if (useAbskew) {
1044                         char* tempskew = new char[9];
1045                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1046                         //strcpy(tempskew, "--abskew"); 
1047                         cPara.push_back(tempskew);
1048                         char* tempSkew = new char[abskew.length()+1];
1049                         //strcpy(tempSkew, abskew.c_str());
1050                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1051                         cPara.push_back(tempSkew);
1052                 }
1053                 
1054                 if (useMinH) {
1055                         char* tempminh = new char[7]; 
1056                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1057                         //strcpy(tempminh, "--minh"); 
1058                         cPara.push_back(tempminh);
1059                         char* tempMinH = new char[minh.length()+1];
1060                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1061                         //strcpy(tempMinH, minh.c_str());
1062                         cPara.push_back(tempMinH);
1063                 }
1064                 
1065                 if (useMindiv) {
1066                         char* tempmindiv = new char[9]; 
1067                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1068                         //strcpy(tempmindiv, "--mindiv"); 
1069                         cPara.push_back(tempmindiv);
1070                         char* tempMindiv = new char[mindiv.length()+1];
1071                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1072                         //strcpy(tempMindiv, mindiv.c_str());
1073                         cPara.push_back(tempMindiv);
1074                 }
1075                 
1076                 if (useXn) {
1077                         char* tempxn = new char[5]; 
1078                         //strcpy(tempxn, "--xn"); 
1079                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1080                         cPara.push_back(tempxn);
1081                         char* tempXn = new char[xn.length()+1];
1082                         //strcpy(tempXn, xn.c_str());
1083                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1084                         cPara.push_back(tempXn);
1085                 }
1086                 
1087                 if (useDn) {
1088                         char* tempdn = new char[5]; 
1089                         //strcpy(tempdn, "--dn"); 
1090                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1091                         cPara.push_back(tempdn);
1092                         char* tempDn = new char[dn.length()+1];
1093                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1094                         //strcpy(tempDn, dn.c_str());
1095                         cPara.push_back(tempDn);
1096                 }
1097                 
1098                 if (useXa) {
1099                         char* tempxa = new char[5]; 
1100                         //strcpy(tempxa, "--xa"); 
1101                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1102                         cPara.push_back(tempxa);
1103                         char* tempXa = new char[xa.length()+1];
1104                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1105                         //strcpy(tempXa, xa.c_str());
1106                         cPara.push_back(tempXa);
1107                 }
1108                 
1109                 if (useChunks) {
1110                         char* tempchunks = new char[9]; 
1111                         //strcpy(tempchunks, "--chunks"); 
1112                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1113                         cPara.push_back(tempchunks);
1114                         char* tempChunks = new char[chunks.length()+1];
1115                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1116                         //strcpy(tempChunks, chunks.c_str());
1117                         cPara.push_back(tempChunks);
1118                 }
1119                 
1120                 if (useMinchunk) {
1121                         char* tempminchunk = new char[11]; 
1122                         //strcpy(tempminchunk, "--minchunk"); 
1123                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1124                         cPara.push_back(tempminchunk);
1125                         char* tempMinchunk = new char[minchunk.length()+1];
1126                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1127                         //strcpy(tempMinchunk, minchunk.c_str());
1128                         cPara.push_back(tempMinchunk);
1129                 }
1130                 
1131                 if (useIdsmoothwindow) {
1132                         char* tempidsmoothwindow = new char[17]; 
1133                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1134                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1135                         cPara.push_back(tempidsmoothwindow);
1136                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1137                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1138                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1139                         cPara.push_back(tempIdsmoothwindow);
1140                 }
1141                 
1142                 /*if (useMinsmoothid) {
1143                         char* tempminsmoothid = new char[14]; 
1144                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1145                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1146                         cPara.push_back(tempminsmoothid);
1147                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1148                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1149                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1150                         cPara.push_back(tempMinsmoothid);
1151                 }*/
1152                 
1153                 if (useMaxp) {
1154                         char* tempmaxp = new char[7]; 
1155                         //strcpy(tempmaxp, "--maxp"); 
1156                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1157                         cPara.push_back(tempmaxp);
1158                         char* tempMaxp = new char[maxp.length()+1];
1159                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1160                         //strcpy(tempMaxp, maxp.c_str());
1161                         cPara.push_back(tempMaxp);
1162                 }
1163                 
1164                 if (!skipgaps) {
1165                         char* tempskipgaps = new char[13]; 
1166                         //strcpy(tempskipgaps, "--[no]skipgaps");
1167                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1168                         cPara.push_back(tempskipgaps);
1169                 }
1170                 
1171                 if (!skipgaps2) {
1172                         char* tempskipgaps2 = new char[14]; 
1173                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1174                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1175                         cPara.push_back(tempskipgaps2);
1176                 }
1177                 
1178                 if (useMinlen) {
1179                         char* tempminlen = new char[9]; 
1180                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1181                         //strcpy(tempminlen, "--minlen"); 
1182                         cPara.push_back(tempminlen);
1183                         char* tempMinlen = new char[minlen.length()+1];
1184                         //strcpy(tempMinlen, minlen.c_str());
1185                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1186                         cPara.push_back(tempMinlen);
1187                 }
1188                 
1189                 if (useMaxlen) {
1190                         char* tempmaxlen = new char[9]; 
1191                         //strcpy(tempmaxlen, "--maxlen"); 
1192                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1193                         cPara.push_back(tempmaxlen);
1194                         char* tempMaxlen = new char[maxlen.length()+1];
1195                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1196                         //strcpy(tempMaxlen, maxlen.c_str());
1197                         cPara.push_back(tempMaxlen);
1198                 }
1199                 
1200                 if (ucl) {
1201                         char* tempucl = new char[5]; 
1202                         strcpy(tempucl, "--ucl"); 
1203                         cPara.push_back(tempucl);
1204                 }
1205                 
1206                 if (useQueryfract) {
1207                         char* tempqueryfract = new char[13]; 
1208                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1209                         //strcpy(tempqueryfract, "--queryfract"); 
1210                         cPara.push_back(tempqueryfract);
1211                         char* tempQueryfract = new char[queryfract.length()+1];
1212                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1213                         //strcpy(tempQueryfract, queryfract.c_str());
1214                         cPara.push_back(tempQueryfract);
1215                 }
1216                 
1217                 
1218                 char** uchimeParameters;
1219                 uchimeParameters = new char*[cPara.size()];
1220                 string commandString = "";
1221                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1222                 //int numArgs = cPara.size();
1223                 
1224                 //uchime_main(numArgs, uchimeParameters); 
1225                 //cout << "commandString = " << commandString << endl;
1226                 system(commandString.c_str());
1227                 
1228                 //free memory
1229                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1230                 delete[] uchimeParameters; 
1231                 
1232                 //remove "" from filenames
1233                 outputFName = outputFName.substr(1, outputFName.length()-2);
1234                 filename = filename.substr(1, filename.length()-2);
1235                 alns = alns.substr(1, alns.length()-2);
1236                 
1237                 if (m->control_pressed) { return 0; }
1238                 
1239                 //create accnos file from uchime results
1240                 ifstream in; 
1241                 m->openInputFile(outputFName, in);
1242                 
1243                 ofstream out;
1244                 m->openOutputFile(accnos, out);
1245                 
1246                 int num = 0;
1247                 numChimeras = 0;
1248                 while(!in.eof()) {
1249                         
1250                         if (m->control_pressed) { break; }
1251                         
1252                         string name = "";
1253                         string chimeraFlag = "";
1254                         in >> chimeraFlag >> name;
1255                         
1256                         //fix name if needed
1257                         if (templatefile == "self") { 
1258                                 name = name.substr(0, name.length()-1); //rip off last /
1259                                 name = name.substr(0, name.find_last_of('/'));
1260                         }
1261                         
1262                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1263                         m->gobble(in);
1264                         
1265                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1266                         num++;
1267                 }
1268                 in.close();
1269                 out.close();
1270                 
1271                 return num;
1272         }
1273         catch(exception& e) {
1274                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1275                 exit(1);
1276         }
1277 }
1278 /**************************************************************************************************/
1279
1280 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1281         try {
1282                 
1283                 processIDS.clear();
1284                 int process = 1;
1285                 int num = 0;
1286                 vector<string> files;
1287                 
1288 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
1289                 //break up file into multiple files
1290                 m->divideFile(filename, processors, files);
1291                 
1292                 if (m->control_pressed) {  return 0;  }
1293                                 
1294                 //loop through and create all the processes you want
1295                 while (process != processors) {
1296                         int pid = fork();
1297                         
1298                         if (pid > 0) {
1299                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1300                                 process++;
1301                         }else if (pid == 0){
1302                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1303                                 
1304                                 //pass numSeqs to parent
1305                                 ofstream out;
1306                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1307                                 m->openOutputFile(tempFile, out);
1308                                 out << num << endl;
1309                                 out << numChimeras << endl;
1310                                 out.close();
1311                                 
1312                                 exit(0);
1313                         }else { 
1314                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1315                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1316                                 exit(0);
1317                         }
1318                 }
1319                 
1320                 //do my part
1321                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1322                 
1323                 //force parent to wait until all the processes are done
1324                 for (int i=0;i<processIDS.size();i++) { 
1325                         int temp = processIDS[i];
1326                         wait(&temp);
1327                 }
1328                 
1329                 for (int i = 0; i < processIDS.size(); i++) {
1330                         ifstream in;
1331                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1332                         m->openInputFile(tempFile, in);
1333                         if (!in.eof()) { 
1334                                 int tempNum = 0; 
1335                                 in >> tempNum; m->gobble(in);
1336                                 num += tempNum; 
1337                                 in >> tempNum;
1338                                 numChimeras += tempNum;
1339                         }
1340                         in.close(); m->mothurRemove(tempFile);
1341                 }
1342 #else
1343                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1344                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1345                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1346                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1347                 
1348                 //divide file
1349                 int count = 0;
1350                 int spot = 0;
1351                 map<int, ofstream*> filehandles;
1352                 map<int, ofstream*>::iterator it3;
1353                 
1354                 ofstream* temp;
1355                 for (int i = 0; i < processors; i++) {
1356                         temp = new ofstream;
1357                         filehandles[i] = temp;
1358                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1359                         files.push_back(filename+toString(i)+".temp");
1360                 }
1361                 
1362                 ifstream in;
1363                 m->openInputFile(filename, in);
1364                 
1365                 while(!in.eof()) {
1366                         
1367                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1368                         
1369                         Sequence tempSeq(in); m->gobble(in); 
1370                         
1371                         if (tempSeq.getName() != "") {
1372                                 tempSeq.printSequence(*(filehandles[spot])); 
1373                                 spot++; count++;
1374                                 if (spot == processors) { spot = 0; }
1375                         }
1376                 }
1377                 in.close();
1378                 
1379                 //delete memory
1380                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1381                         (*(it3->second)).close();
1382                         delete it3->second;
1383                 }
1384                 
1385                 //sanity check for number of processors
1386                 if (count < processors) { processors = count; }
1387                 
1388                 vector<uchimeData*> pDataArray; 
1389                 DWORD   dwThreadIdArray[processors-1];
1390                 HANDLE  hThreadArray[processors-1]; 
1391                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1392                 
1393                 //Create processor worker threads.
1394                 for( int i=1; i<processors; i++ ){
1395                         // Allocate memory for thread data.
1396                         string extension = toString(i) + ".temp";
1397                         
1398                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1399                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1400                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1401                         
1402                         pDataArray.push_back(tempUchime);
1403                         processIDS.push_back(i);
1404                         
1405                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1406                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1407                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1408                 }
1409                 
1410                 
1411                 //using the main process as a worker saves time and memory
1412                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1413                 
1414                 //Wait until all threads have terminated.
1415                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1416                 
1417                 //Close all thread handles and free memory allocations.
1418                 for(int i=0; i < pDataArray.size(); i++){
1419                         num += pDataArray[i]->count;
1420                         numChimeras += pDataArray[i]->numChimeras;
1421                         CloseHandle(hThreadArray[i]);
1422                         delete pDataArray[i];
1423                 }
1424 #endif          
1425                 
1426                 //append output files
1427                 for(int i=0;i<processIDS.size();i++){
1428                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1429                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1430                         
1431                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1432                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1433                         
1434                         if (chimealns) {
1435                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1436                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1437                         }
1438                 }
1439                 
1440                 //get rid of the file pieces.
1441                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1442                 return num;     
1443         }
1444         catch(exception& e) {
1445                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1446                 exit(1);
1447         }
1448 }
1449 /**************************************************************************************************/
1450
1451 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups) {
1452         try {
1453                 
1454                 processIDS.clear();
1455                 int process = 1;
1456                 int num = 0;
1457                 
1458                 //sanity check
1459                 if (groups.size() < processors) { processors = groups.size(); }
1460                 
1461                 //divide the groups between the processors
1462                 vector<linePair> lines;
1463                 int numGroupsPerProcessor = groups.size() / processors;
1464                 for (int i = 0; i < processors; i++) {
1465                         int startIndex =  i * numGroupsPerProcessor;
1466                         int endIndex = (i+1) * numGroupsPerProcessor;
1467                         if(i == (processors - 1)){      endIndex = groups.size();       }
1468                         lines.push_back(linePair(startIndex, endIndex));
1469                 }
1470                 
1471 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
1472                                 
1473                 //loop through and create all the processes you want
1474                 while (process != processors) {
1475                         int pid = fork();
1476                         
1477                         if (pid > 0) {
1478                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1479                                 process++;
1480                         }else if (pid == 0){
1481                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1482                                 
1483                                 //pass numSeqs to parent
1484                                 ofstream out;
1485                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1486                                 m->openOutputFile(tempFile, out);
1487                                 out << num << endl;
1488                                 out.close();
1489                                 
1490                                 exit(0);
1491                         }else { 
1492                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1493                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1494                                 exit(0);
1495                         }
1496                 }
1497                 
1498                 //do my part
1499                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1500                 
1501                 //force parent to wait until all the processes are done
1502                 for (int i=0;i<processIDS.size();i++) { 
1503                         int temp = processIDS[i];
1504                         wait(&temp);
1505                 }
1506                 
1507                 for (int i = 0; i < processIDS.size(); i++) {
1508                         ifstream in;
1509                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1510                         m->openInputFile(tempFile, in);
1511                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1512                         in.close(); m->mothurRemove(tempFile);
1513                 }
1514                                 
1515 #else
1516                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1517                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1518                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1519                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1520                 
1521                 vector<uchimeData*> pDataArray; 
1522                 DWORD   dwThreadIdArray[processors-1];
1523                 HANDLE  hThreadArray[processors-1]; 
1524                 
1525                 //Create processor worker threads.
1526                 for( int i=1; i<processors; i++ ){
1527                         // Allocate memory for thread data.
1528                         string extension = toString(i) + ".temp";
1529                         
1530                         uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastafile, namefile, groupfile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1531                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1532                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1533                         
1534                         pDataArray.push_back(tempUchime);
1535                         processIDS.push_back(i);
1536                         
1537                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1538                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1539                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1540                 }
1541                 
1542                 
1543                 //using the main process as a worker saves time and memory
1544                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1545                 
1546                 //Wait until all threads have terminated.
1547                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1548                 
1549                 //Close all thread handles and free memory allocations.
1550                 for(int i=0; i < pDataArray.size(); i++){
1551                         num += pDataArray[i]->count;
1552                         CloseHandle(hThreadArray[i]);
1553                         delete pDataArray[i];
1554                 }
1555 #endif          
1556                 
1557                                 
1558                 //append output files
1559                 for(int i=0;i<processIDS.size();i++){
1560                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1561                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1562                         
1563                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1564                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1565                         
1566                         if (chimealns) {
1567                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1568                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1569                         }
1570                 }
1571                 
1572                 return num;     
1573                 
1574         }
1575         catch(exception& e) {
1576                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1577                 exit(1);
1578         }
1579 }
1580 /**************************************************************************************************/
1581