]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
added sff.multiple command. fixed issue with windows paralellization in chimera...
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
29                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
30                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
31                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
32                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
33                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
34                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
35                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
36                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
37                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
38                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
39                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
40                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
41                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
42                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
43                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
44                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
45                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46
47                 vector<string> myArray;
48                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
49                 return myArray;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string ChimeraUchimeCommand::getHelpString(){   
58         try {
59                 string helpString = "";
60                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
61                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
62                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
63                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
64                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
65         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
66                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
67                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
68                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
69                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
70                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
71                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
72                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
73                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
74                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
75                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
76                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
77                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
78                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
79                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
80                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
81                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
82                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
83                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
84                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
85                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
86                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
87                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
88 #ifdef USE_MPI
89                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
90 #endif
91                 helpString += "The chimera.uchime command should be in the following format: \n";
92                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
93                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
95                 return helpString;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){    
104         try {
105         string outputFileName = "";
106                 map<string, vector<string> >::iterator it;
107         
108         //is this a type this command creates
109         it = outputTypes.find(type);
110         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
111         else {
112             if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
113             else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
114             else if (type == "alns") {  outputFileName =  "uchime.alns"; }
115             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
116         }
117         return outputFileName;
118         }
119         catch(exception& e) {
120                 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
121                 exit(1);
122         }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
136                 exit(1);
137         }
138 }
139 //***************************************************************************************************************
140 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
141         try {
142                 abort = false; calledHelp = false; hasName=false; hasCount=false;
143                 ReferenceDB* rdb = ReferenceDB::getInstance();
144                 
145                 //allow user to run help
146                 if(option == "help") { help(); abort = true; calledHelp = true; }
147                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
148                 
149                 else {
150                         vector<string> myArray = setParameters();
151                         
152                         OptionParser parser(option);
153                         map<string,string> parameters = parser.getParameters();
154                         
155                         ValidParameters validParameter("chimera.uchime");
156                         map<string,string>::iterator it;
157                         
158                         //check to make sure all parameters are valid for command
159                         for (it = parameters.begin(); it != parameters.end(); it++) { 
160                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
161                         }
162                         
163                         vector<string> tempOutNames;
164                         outputTypes["chimera"] = tempOutNames;
165                         outputTypes["accnos"] = tempOutNames;
166                         outputTypes["alns"] = tempOutNames;
167                         
168                         //if the user changes the input directory command factory will send this info to us in the output parameter 
169                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
170                         if (inputDir == "not found"){   inputDir = "";          }
171                         
172                         //check for required parameters
173                         fastafile = validParameter.validFile(parameters, "fasta", false);
174                         if (fastafile == "not found") {                                 
175                                 //if there is a current fasta file, use it
176                                 string filename = m->getFastaFile(); 
177                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179                         }else { 
180                                 m->splitAtDash(fastafile, fastaFileNames);
181                                 
182                                 //go through files and make sure they are good, if not, then disregard them
183                                 for (int i = 0; i < fastaFileNames.size(); i++) {
184                                         
185                                         bool ignore = false;
186                                         if (fastaFileNames[i] == "current") { 
187                                                 fastaFileNames[i] = m->getFastaFile(); 
188                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
189                                                 else {  
190                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
191                                                         //erase from file list
192                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
193                                                         i--;
194                                                 }
195                                         }
196                                         
197                                         if (!ignore) {
198                                                 
199                                                 if (inputDir != "") {
200                                                         string path = m->hasPath(fastaFileNames[i]);
201                                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
203                                                 }
204                                                 
205                                                 int ableToOpen;
206                                                 ifstream in;
207                                                 
208                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
209                                                 
210                                                 //if you can't open it, try default location
211                                                 if (ableToOpen == 1) {
212                                                         if (m->getDefaultPath() != "") { //default path is set
213                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
214                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
215                                                                 ifstream in2;
216                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
217                                                                 in2.close();
218                                                                 fastaFileNames[i] = tryPath;
219                                                         }
220                                                 }
221                                                 
222                                                 if (ableToOpen == 1) {
223                                                         if (m->getOutputDir() != "") { //default path is set
224                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
225                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
226                                                                 ifstream in2;
227                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
228                                                                 in2.close();
229                                                                 fastaFileNames[i] = tryPath;
230                                                         }
231                                                 }
232                                                 
233                                                 in.close();
234                                                 
235                                                 if (ableToOpen == 1) { 
236                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
237                                                         //erase from file list
238                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
239                                                         i--;
240                                                 }else {
241                                                         m->setFastaFile(fastaFileNames[i]);
242                                                 }
243                                         }
244                                 }
245                                 
246                                 //make sure there is at least one valid file left
247                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
248                         }
249                         
250                         
251                         //check for required parameters
252                         namefile = validParameter.validFile(parameters, "name", false);
253                         if (namefile == "not found") { namefile = "";   }
254                         else { 
255                                 m->splitAtDash(namefile, nameFileNames);
256                                 
257                                 //go through files and make sure they are good, if not, then disregard them
258                                 for (int i = 0; i < nameFileNames.size(); i++) {
259                                         
260                                         bool ignore = false;
261                                         if (nameFileNames[i] == "current") { 
262                                                 nameFileNames[i] = m->getNameFile(); 
263                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
264                                                 else {  
265                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
266                                                         //erase from file list
267                                                         nameFileNames.erase(nameFileNames.begin()+i);
268                                                         i--;
269                                                 }
270                                         }
271                                         
272                                         if (!ignore) {
273                                                 
274                                                 if (inputDir != "") {
275                                                         string path = m->hasPath(nameFileNames[i]);
276                                                         //if the user has not given a path then, add inputdir. else leave path alone.
277                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
278                                                 }
279                                                 
280                                                 int ableToOpen;
281                                                 ifstream in;
282                                                 
283                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
284                                                 
285                                                 //if you can't open it, try default location
286                                                 if (ableToOpen == 1) {
287                                                         if (m->getDefaultPath() != "") { //default path is set
288                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
289                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
290                                                                 ifstream in2;
291                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
292                                                                 in2.close();
293                                                                 nameFileNames[i] = tryPath;
294                                                         }
295                                                 }
296                                                 
297                                                 if (ableToOpen == 1) {
298                                                         if (m->getOutputDir() != "") { //default path is set
299                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
300                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
301                                                                 ifstream in2;
302                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
303                                                                 in2.close();
304                                                                 nameFileNames[i] = tryPath;
305                                                         }
306                                                 }
307                                                 
308                                                 in.close();
309                                                 
310                                                 if (ableToOpen == 1) { 
311                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
312                                                         //erase from file list
313                                                         nameFileNames.erase(nameFileNames.begin()+i);
314                                                         i--;
315                                                 }else {
316                                                         m->setNameFile(nameFileNames[i]);
317                                                 }
318                                         }
319                                 }
320                         }
321             
322             if (nameFileNames.size() != 0) { hasName = true; }
323             
324             //check for required parameters
325             vector<string> countfileNames;
326                         countfile = validParameter.validFile(parameters, "count", false);
327                         if (countfile == "not found") { 
328                 countfile = "";  
329                         }else { 
330                                 m->splitAtDash(countfile, countfileNames);
331                                 
332                                 //go through files and make sure they are good, if not, then disregard them
333                                 for (int i = 0; i < countfileNames.size(); i++) {
334                                         
335                                         bool ignore = false;
336                                         if (countfileNames[i] == "current") { 
337                                                 countfileNames[i] = m->getCountTableFile(); 
338                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
339                                                 else {  
340                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
341                                                         //erase from file list
342                                                         countfileNames.erase(countfileNames.begin()+i);
343                                                         i--;
344                                                 }
345                                         }
346                                         
347                                         if (!ignore) {
348                                                 
349                                                 if (inputDir != "") {
350                                                         string path = m->hasPath(countfileNames[i]);
351                                                         //if the user has not given a path then, add inputdir. else leave path alone.
352                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
353                                                 }
354                                                 
355                                                 int ableToOpen;
356                                                 ifstream in;
357                                                 
358                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
359                                                 
360                                                 //if you can't open it, try default location
361                                                 if (ableToOpen == 1) {
362                                                         if (m->getDefaultPath() != "") { //default path is set
363                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
364                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
365                                                                 ifstream in2;
366                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
367                                                                 in2.close();
368                                                                 countfileNames[i] = tryPath;
369                                                         }
370                                                 }
371                                                 
372                                                 if (ableToOpen == 1) {
373                                                         if (m->getOutputDir() != "") { //default path is set
374                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
375                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
376                                                                 ifstream in2;
377                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
378                                                                 in2.close();
379                                                                 countfileNames[i] = tryPath;
380                                                         }
381                                                 }
382                                                 
383                                                 in.close();
384                                                 
385                                                 if (ableToOpen == 1) { 
386                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
387                                                         //erase from file list
388                                                         countfileNames.erase(countfileNames.begin()+i);
389                                                         i--;
390                                                 }else {
391                                                         m->setCountTableFile(countfileNames[i]);
392                                                 }
393                                         }
394                                 }
395                         }
396             
397             if (countfileNames.size() != 0) { hasCount = true; }
398             
399                         //make sure there is at least one valid file left
400             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
401             
402             if (!hasName && hasCount) { nameFileNames = countfileNames; }
403             
404                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
405                         
406                         bool hasGroup = true;
407                         groupfile = validParameter.validFile(parameters, "group", false);
408                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
409                         else { 
410                                 m->splitAtDash(groupfile, groupFileNames);
411                                 
412                                 //go through files and make sure they are good, if not, then disregard them
413                                 for (int i = 0; i < groupFileNames.size(); i++) {
414                                         
415                                         bool ignore = false;
416                                         if (groupFileNames[i] == "current") { 
417                                                 groupFileNames[i] = m->getGroupFile(); 
418                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
419                                                 else {  
420                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
421                                                         //erase from file list
422                                                         groupFileNames.erase(groupFileNames.begin()+i);
423                                                         i--;
424                                                 }
425                                         }
426                                         
427                                         if (!ignore) {
428                                                 
429                                                 if (inputDir != "") {
430                                                         string path = m->hasPath(groupFileNames[i]);
431                                                         //if the user has not given a path then, add inputdir. else leave path alone.
432                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
433                                                 }
434                                                 
435                                                 int ableToOpen;
436                                                 ifstream in;
437                                                 
438                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
439                                                 
440                                                 //if you can't open it, try default location
441                                                 if (ableToOpen == 1) {
442                                                         if (m->getDefaultPath() != "") { //default path is set
443                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
444                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
445                                                                 ifstream in2;
446                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
447                                                                 in2.close();
448                                                                 groupFileNames[i] = tryPath;
449                                                         }
450                                                 }
451                                                 
452                                                 if (ableToOpen == 1) {
453                                                         if (m->getOutputDir() != "") { //default path is set
454                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
455                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
456                                                                 ifstream in2;
457                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
458                                                                 in2.close();
459                                                                 groupFileNames[i] = tryPath;
460                                                         }
461                                                 }
462                                                 
463                                                 in.close();
464                                                 
465                                                 if (ableToOpen == 1) { 
466                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
467                                                         //erase from file list
468                                                         groupFileNames.erase(groupFileNames.begin()+i);
469                                                         i--;
470                                                 }else {
471                                                         m->setGroupFile(groupFileNames[i]);
472                                                 }
473                                         }
474                                 }
475                                 
476                                 //make sure there is at least one valid file left
477                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
478                         }
479                         
480                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
481                         
482             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
483                         //if the user changes the output directory command factory will send this info to us in the output parameter 
484                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
485                         
486                         
487                         //if the user changes the output directory command factory will send this info to us in the output parameter 
488                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
489                         
490                         string path;
491                         it = parameters.find("reference");
492                         //user has given a template file
493                         if(it != parameters.end()){ 
494                                 if (it->second == "self") { templatefile = "self"; }
495                                 else {
496                                         path = m->hasPath(it->second);
497                                         //if the user has not given a path then, add inputdir. else leave path alone.
498                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
499                                         
500                                         templatefile = validParameter.validFile(parameters, "reference", true);
501                                         if (templatefile == "not open") { abort = true; }
502                                         else if (templatefile == "not found") { //check for saved reference sequences
503                                                 if (rdb->getSavedReference() != "") {
504                                                         templatefile = rdb->getSavedReference();
505                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
506                                                 }else {
507                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
508                                                         m->mothurOutEndLine();
509                                                         abort = true; 
510                                                 }
511                                         }
512                                 }
513                         }else if (hasName) {  templatefile = "self"; }
514             else if (hasCount) {  templatefile = "self"; }
515                         else { 
516                                 if (rdb->getSavedReference() != "") {
517                                         templatefile = rdb->getSavedReference();
518                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
519                                 }else {
520                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
521                                         m->mothurOutEndLine();
522                                         templatefile = ""; abort = true; 
523                                 } 
524                         }
525                                 
526                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
527                         m->setProcessors(temp);
528                         m->mothurConvert(temp, processors);
529                         
530                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
531                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
532                         
533                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
534                         chimealns = m->isTrue(temp); 
535                         
536                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
537                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
538                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
539                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
540                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
541                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
542                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
543                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
544                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
545                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
546                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
547                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
548                         
549                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
550                         ucl = m->isTrue(temp);
551                         
552                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
553                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
554                         
555                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
556                         skipgaps = m->isTrue(temp); 
557
558                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
559                         skipgaps2 = m->isTrue(temp); 
560                         
561                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
562                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
563                         
564                         //look for uchime exe
565                         path = m->argv;
566                         string tempPath = path;
567                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
568                         path = path.substr(0, (tempPath.find_last_of('m')));
569                         
570                         string uchimeCommand;
571 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
572                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
573             if (m->debug) { 
574                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
575                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
576                 newCommand->execute();
577                 delete newCommand;
578                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
579                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
580                 newCommand->execute();
581                 delete newCommand;
582             }
583 #else
584                         uchimeCommand = path + "uchime.exe";
585 #endif
586         
587                         //test to make sure uchime exists
588                         ifstream in;
589                         uchimeCommand = m->getFullPathName(uchimeCommand);
590                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
591                         if(ableToOpen == 1) {   
592                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
593                 //check to see if uchime is in the path??
594                 
595                 string uLocation = m->findProgramPath("uchime");
596                 
597                 
598                 ifstream in2;
599 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
600                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
601 #else
602                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
603 #endif
604
605                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
606                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
607             }else {  uchimeLocation = uchimeCommand; }
608             
609             uchimeLocation = m->getFullPathName(uchimeLocation);
610         }
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
614                 exit(1);
615         }
616 }
617 //***************************************************************************************************************
618
619 int ChimeraUchimeCommand::execute(){
620         try{
621         
622         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
623                 
624                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
625                 
626                 for (int s = 0; s < fastaFileNames.size(); s++) {
627                         
628                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
629                         
630                         int start = time(NULL); 
631                         string nameFile = "";
632                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
633                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
634                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
635                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
636                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
637                                 
638                         //you provided a groupfile
639                         string groupFile = "";
640             bool hasGroup = false;
641                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
642             else if (hasCount) {
643                 CountTable ct;
644                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
645             }
646                         
647                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
648
649                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
650                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
651                                         nameFile = nameFileNames[s];
652                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
653                                                                                 
654                                 map<string, string> seqs;  
655                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
656
657                                 //read namefile
658                                 vector<seqPriorityNode> nameMapCount;
659                 int error;
660                 if (hasCount) {
661                     CountTable ct;
662                     ct.readTable(nameFile);
663                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
664                         int num = ct.getNumSeqs(it->first);
665                         if (num == 0) { error = 1; }
666                         else {
667                             seqPriorityNode temp(num, it->second, it->first);
668                             nameMapCount.push_back(temp);
669                         }
670                     }
671                 }else {
672                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
673                 }
674                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
675                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
676                                 
677                                 printFile(nameMapCount, newFasta);
678                                 fastaFileNames[s] = newFasta;
679                         }
680                         
681                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
682                         
683                         if (hasGroup) {
684                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
685                                         nameFile = nameFileNames[s];
686                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
687                                 
688                                 //Parse sequences by group
689                 vector<string> groups;
690                 map<string, string> uniqueNames;
691                 if (hasCount) {
692                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
693                     groups = cparser->getNamesOfGroups();
694                     uniqueNames = cparser->getAllSeqsMap();
695                 }else{
696                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
697                     groups = sparser->getNamesOfGroups();
698                     uniqueNames = sparser->getAllSeqsMap();
699                 }
700                                         
701                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
702                                                                 
703                                 //clears files
704                                 ofstream out, out1, out2;
705                                 m->openOutputFile(outputFileName, out); out.close(); 
706                                 m->openOutputFile(accnosFileName, out1); out1.close();
707                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
708                                 int totalSeqs = 0;
709                                 
710                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
711                                 else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
712
713                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
714                 if (hasCount) { delete cparser; }
715                 else { delete sparser; }
716
717                                 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
718                                 
719                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
720                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
721                                 
722                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
723                                         
724                         }else{
725                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
726                         
727                                 int numSeqs = 0;
728                                 int numChimeras = 0;
729
730                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
731                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
732                                 
733                                 //add headings
734                                 ofstream out;
735                                 m->openOutputFile(outputFileName+".temp", out); 
736                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
737                                 out.close();
738                                 
739                                 m->appendFiles(outputFileName, outputFileName+".temp");
740                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
741                                 
742                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
743                         
744                                 //remove file made for uchime
745                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
746                         
747                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
748                         }
749                         
750                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
751                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
752                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
753                 }
754         
755                 //set accnos file as new current accnosfile
756                 string current = "";
757                 itTypes = outputTypes.find("accnos");
758                 if (itTypes != outputTypes.end()) {
759                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
760                 }
761                 
762                 m->mothurOutEndLine();
763                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
764                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
765                 m->mothurOutEndLine();
766                 
767                 return 0;
768                 
769         }
770         catch(exception& e) {
771                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
772                 exit(1);
773         }
774 }
775 //**********************************************************************************************************************
776 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
777         try {
778                 map<string, string>::iterator itUnique;
779                 int total = 0;
780                 
781                 //edit accnos file
782                 ifstream in2; 
783                 m->openInputFile(accnosFileName, in2);
784                 
785                 ofstream out2;
786                 m->openOutputFile(accnosFileName+".temp", out2);
787                 
788                 string name;
789                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
790                 set<string>::iterator itNames;
791                 set<string> chimerasInFile;
792                 set<string>::iterator itChimeras;
793
794                 
795                 while (!in2.eof()) {
796                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
797                         
798                         in2 >> name; m->gobble(in2);
799                         
800                         //find unique name
801                         itUnique = uniqueNames.find(name);
802                         
803                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
804                         else {
805                                 itChimeras = chimerasInFile.find((itUnique->second));
806                                 
807                                 if (itChimeras == chimerasInFile.end()) {
808                                         out2 << itUnique->second << endl;
809                                         chimerasInFile.insert((itUnique->second));
810                                         total++;
811                                 }
812                         }
813                 }
814                 in2.close();
815                 out2.close();
816                 
817                 m->mothurRemove(accnosFileName);
818                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
819                 
820                 
821                 
822                 //edit chimera file
823                 ifstream in; 
824                 m->openInputFile(outputFileName, in);
825                 
826                 ofstream out;
827                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
828                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
829                 
830                 float temp1;
831                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
832                 name = "";
833                 namesInFile.clear();    
834                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
835                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
836                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
837                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
838                 */
839                 
840                 while (!in.eof()) {
841                         
842                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
843                         
844                         bool print = false;
845                         in >> temp1;    m->gobble(in);
846                         in >> name;             m->gobble(in);
847                         in >> parent1;  m->gobble(in);
848                         in >> parent2;  m->gobble(in);
849                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
850                         m->gobble(in);
851                         
852                         //parse name - name will look like U68590/ab=1/
853                         string restOfName = "";
854                         int pos = name.find_first_of('/');
855                         if (pos != string::npos) {
856                                 restOfName = name.substr(pos);
857                                 name = name.substr(0, pos);
858                         }
859                         
860                         //find unique name
861                         itUnique = uniqueNames.find(name);
862                         
863                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
864                         else {
865                                 name = itUnique->second;
866                                 //is this name already in the file
867                                 itNames = namesInFile.find((name));
868                                 
869                                 if (itNames == namesInFile.end()) { //no not in file
870                                         if (flag == "N") { //are you really a no??
871                                                 //is this sequence really not chimeric??
872                                                 itChimeras = chimerasInFile.find(name);
873                                                 
874                                                 //then you really are a no so print, otherwise skip
875                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
876                                         }else{ print = true; }
877                                 }
878                         }
879                         
880                         if (print) {
881                                 out << temp1 << '\t' << name << restOfName << '\t';
882                                 namesInFile.insert(name);
883                                 
884                                 //parse parent1 names
885                                 if (parent1 != "*") {
886                                         restOfName = "";
887                                         pos = parent1.find_first_of('/');
888                                         if (pos != string::npos) {
889                                                 restOfName = parent1.substr(pos);
890                                                 parent1 = parent1.substr(0, pos);
891                                         }
892                                         
893                                         itUnique = uniqueNames.find(parent1);
894                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
895                                         else {  out << itUnique->second << restOfName << '\t';  }
896                                 }else { out << parent1 << '\t'; }
897                                 
898                                 //parse parent2 names
899                                 if (parent2 != "*") {
900                                         restOfName = "";
901                                         pos = parent2.find_first_of('/');
902                                         if (pos != string::npos) {
903                                                 restOfName = parent2.substr(pos);
904                                                 parent2 = parent2.substr(0, pos);
905                                         }
906                                         
907                                         itUnique = uniqueNames.find(parent2);
908                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909                                         else {  out << itUnique->second << restOfName << '\t';  }
910                                 }else { out << parent2 << '\t'; }
911                                 
912                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
913                         }
914                 }
915                 in.close();
916                 out.close();
917                 
918                 m->mothurRemove(outputFileName);
919                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
920                 
921                                 
922                 //edit anls file
923                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
924                 /*
925                  ------------------------------------------------------------------------
926                  Query   (  179 nt) F21Fcsw_11639/ab=591/
927                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
928                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
929                  
930                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
931                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
932                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
933                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
934                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
935                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
936                  
937                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
938                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
939                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
940                  Diffs      NNN     N N  N                   N  N BB    NNN                              
941                  Votes      000     0 0  0                   0  0 ++    000                              
942                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
943                  
944                  A   159 TGGAACTGAGACACGGTCCAA 179
945                  Q   159 TGGAACTGAGACACGGTCCAA 179
946                  B   161 TGGAACTGAGACACGGTCCAA 181
947                  Diffs                        
948                  Votes                        
949                  Model   BBBBBBBBBBBBBBBBBBBBB
950                  
951                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
952                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
953                 */
954                 if (chimealns) {
955                         ifstream in3; 
956                         m->openInputFile(alnsFileName, in3);
957                 
958                         ofstream out3;
959                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
960                 
961                         name = "";
962                         namesInFile.clear();
963                         string line = "";
964                         
965                         while (!in3.eof()) {
966                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
967                                 
968                                 line = "";
969                                 line = m->getline(in3); 
970                                 string temp = "";
971                                 
972                                 if (line != "") {
973                                         istringstream iss(line);
974                                         iss >> temp;
975                                         
976                                         //are you a name line
977                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
978                                                 int spot = 0;
979                                                 for (int i = 0; i < line.length(); i++) {
980                                                         spot = i;
981                                                         if (line[i] == ')') { break; }
982                                                         else { out3 << line[i]; }
983                                                 }
984                                                 
985                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
986                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
987                                                 else {
988                                                         out << line[spot] << line[spot+1];
989                                                         
990                                                         name = line.substr(spot+2);
991                                                         
992                                                         //parse name - name will either look like U68590/ab=1/ or U68590
993                                                         string restOfName = "";
994                                                         int pos = name.find_first_of('/');
995                                                         if (pos != string::npos) {
996                                                                 restOfName = name.substr(pos);
997                                                                 name = name.substr(0, pos);
998                                                         }
999                                                         
1000                                                         //find unique name
1001                                                         itUnique = uniqueNames.find(name);
1002                                                         
1003                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1004                                                         else {
1005                                                                 //only limit repeats on query names
1006                                                                 if (temp == "Query") {
1007                                                                         itNames = namesInFile.find((itUnique->second));
1008                                                                         
1009                                                                         if (itNames == namesInFile.end()) {
1010                                                                                 out << itUnique->second << restOfName << endl;
1011                                                                                 namesInFile.insert((itUnique->second));
1012                                                                         }
1013                                                                 }else { out << itUnique->second << restOfName << endl;  }
1014                                                         }
1015                                                         
1016                                                 }
1017                                                 
1018                                         }else { //not need to alter line
1019                                                 out3 << line << endl;
1020                                         }
1021                                 }else { out3 << endl; }
1022                         }
1023                         in3.close();
1024                         out3.close();
1025                         
1026                         m->mothurRemove(alnsFileName);
1027                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1028                 }
1029                 
1030                 return total;
1031         }
1032         catch(exception& e) {
1033                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1034                 exit(1);
1035         }
1036 }       
1037 //**********************************************************************************************************************
1038 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1039         try {
1040                 
1041                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1042                 
1043                 ofstream out;
1044                 m->openOutputFile(filename, out);
1045                 
1046                 //print new file in order of
1047                 for (int i = 0; i < nameMapCount.size(); i++) {
1048                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1049                 }
1050                 out.close();
1051                 
1052                 return 0;
1053         }
1054         catch(exception& e) {
1055                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1056                 exit(1);
1057         }
1058 }       
1059 //**********************************************************************************************************************
1060 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1061         try {
1062                 //create input file for uchime
1063                 //read through fastafile and store info
1064                 ifstream in;
1065                 m->openInputFile(filename, in);
1066                 
1067                 while (!in.eof()) {
1068                         
1069                         if (m->control_pressed) { in.close(); return 0; }
1070                         
1071                         Sequence seq(in); m->gobble(in);
1072                         seqs[seq.getName()] = seq.getAligned();
1073                 }
1074                 in.close();
1075                 
1076                 return 0;
1077         }
1078         catch(exception& e) {
1079                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1080                 exit(1);
1081         }
1082 }       
1083 //**********************************************************************************************************************
1084
1085 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1086         try {
1087                 string nameFile = "";
1088                 
1089                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1090                 
1091                 //use unique.seqs to create new name and fastafile
1092                 string inputString = "fasta=" + inputFile;
1093                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1094                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1095                 m->mothurCalling = true;
1096         
1097                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1098                 uniqueCommand->execute();
1099                 
1100                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1101                 
1102                 delete uniqueCommand;
1103                 m->mothurCalling = false;
1104                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1105                 
1106                 nameFile = filenames["name"][0];
1107                 inputFile = filenames["fasta"][0];
1108                 
1109                 return nameFile;
1110         }
1111         catch(exception& e) {
1112                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1113                 exit(1);
1114         }
1115 }
1116 //**********************************************************************************************************************
1117 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1118         try {
1119                 
1120                 int totalSeqs = 0;
1121                 int numChimeras = 0;
1122                 
1123                 for (int i = start; i < end; i++) {
1124                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
1125             
1126                         int error;
1127             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1128             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1129                         
1130                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1131                         totalSeqs += numSeqs;
1132                         
1133                         if (m->control_pressed) { return 0; }
1134                         
1135                         //remove file made for uchime
1136                         if (!m->debug) {  m->mothurRemove(filename);  }
1137             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1138                         
1139                         //append files
1140                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1141                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1142                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1143                         
1144                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1145                 }       
1146                 return totalSeqs;
1147                 
1148         }
1149         catch(exception& e) {
1150                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1151                 exit(1);
1152         }
1153 }       
1154 //**********************************************************************************************************************
1155
1156 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1157         try {
1158                 
1159                 outputFName = m->getFullPathName(outputFName);
1160                 filename = m->getFullPathName(filename);
1161                 alns = m->getFullPathName(alns);
1162                 
1163                 //to allow for spaces in the path
1164                 outputFName = "\"" + outputFName + "\"";
1165                 filename = "\"" + filename + "\"";
1166                 alns = "\"" + alns + "\"";
1167                                 
1168                 vector<char*> cPara;
1169                 
1170                 string uchimeCommand = uchimeLocation;
1171 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1172                 uchimeCommand += " ";
1173 #else
1174                 uchimeCommand = "\"" + uchimeCommand + "\"";
1175 #endif
1176         char* tempUchime;
1177                 tempUchime= new char[uchimeCommand.length()+1]; 
1178                 *tempUchime = '\0';
1179                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1180                 cPara.push_back(tempUchime);
1181                 
1182         //are you using a reference file
1183                 if (templatefile != "self") {
1184             string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
1185             prepFile(filename.substr(1, filename.length()-2), outputFileName);
1186             filename = outputFileName;
1187             filename = "\"" + filename + "\"";
1188                         //add reference file
1189                         char* tempRef = new char[5]; 
1190                         //strcpy(tempRef, "--db"); 
1191                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1192                         cPara.push_back(tempRef);  
1193                         char* tempR = new char[templatefile.length()+1];
1194                         //strcpy(tempR, templatefile.c_str());
1195                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1196                         cPara.push_back(tempR);
1197                 }
1198                 
1199                 char* tempIn = new char[8]; 
1200                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1201                 //strcpy(tempIn, "--input"); 
1202                 cPara.push_back(tempIn);
1203                 char* temp = new char[filename.length()+1];
1204                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1205                 //strcpy(temp, filename.c_str());
1206                 cPara.push_back(temp);
1207                 
1208                 char* tempO = new char[12]; 
1209                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1210                 //strcpy(tempO, "--uchimeout"); 
1211                 cPara.push_back(tempO);
1212                 char* tempout = new char[outputFName.length()+1];
1213                 //strcpy(tempout, outputFName.c_str());
1214                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1215                 cPara.push_back(tempout);
1216                 
1217                 if (chimealns) {
1218                         char* tempA = new char[13]; 
1219                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1220                         //strcpy(tempA, "--uchimealns"); 
1221                         cPara.push_back(tempA);
1222                         char* tempa = new char[alns.length()+1];
1223                         //strcpy(tempa, alns.c_str());
1224                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1225                         cPara.push_back(tempa);
1226                 }
1227                 
1228                 if (useAbskew) {
1229                         char* tempskew = new char[9];
1230                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1231                         //strcpy(tempskew, "--abskew"); 
1232                         cPara.push_back(tempskew);
1233                         char* tempSkew = new char[abskew.length()+1];
1234                         //strcpy(tempSkew, abskew.c_str());
1235                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1236                         cPara.push_back(tempSkew);
1237                 }
1238                 
1239                 if (useMinH) {
1240                         char* tempminh = new char[7]; 
1241                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1242                         //strcpy(tempminh, "--minh"); 
1243                         cPara.push_back(tempminh);
1244                         char* tempMinH = new char[minh.length()+1];
1245                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1246                         //strcpy(tempMinH, minh.c_str());
1247                         cPara.push_back(tempMinH);
1248                 }
1249                 
1250                 if (useMindiv) {
1251                         char* tempmindiv = new char[9]; 
1252                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1253                         //strcpy(tempmindiv, "--mindiv"); 
1254                         cPara.push_back(tempmindiv);
1255                         char* tempMindiv = new char[mindiv.length()+1];
1256                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1257                         //strcpy(tempMindiv, mindiv.c_str());
1258                         cPara.push_back(tempMindiv);
1259                 }
1260                 
1261                 if (useXn) {
1262                         char* tempxn = new char[5]; 
1263                         //strcpy(tempxn, "--xn"); 
1264                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1265                         cPara.push_back(tempxn);
1266                         char* tempXn = new char[xn.length()+1];
1267                         //strcpy(tempXn, xn.c_str());
1268                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1269                         cPara.push_back(tempXn);
1270                 }
1271                 
1272                 if (useDn) {
1273                         char* tempdn = new char[5]; 
1274                         //strcpy(tempdn, "--dn"); 
1275                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1276                         cPara.push_back(tempdn);
1277                         char* tempDn = new char[dn.length()+1];
1278                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1279                         //strcpy(tempDn, dn.c_str());
1280                         cPara.push_back(tempDn);
1281                 }
1282                 
1283                 if (useXa) {
1284                         char* tempxa = new char[5]; 
1285                         //strcpy(tempxa, "--xa"); 
1286                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1287                         cPara.push_back(tempxa);
1288                         char* tempXa = new char[xa.length()+1];
1289                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1290                         //strcpy(tempXa, xa.c_str());
1291                         cPara.push_back(tempXa);
1292                 }
1293                 
1294                 if (useChunks) {
1295                         char* tempchunks = new char[9]; 
1296                         //strcpy(tempchunks, "--chunks"); 
1297                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1298                         cPara.push_back(tempchunks);
1299                         char* tempChunks = new char[chunks.length()+1];
1300                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1301                         //strcpy(tempChunks, chunks.c_str());
1302                         cPara.push_back(tempChunks);
1303                 }
1304                 
1305                 if (useMinchunk) {
1306                         char* tempminchunk = new char[11]; 
1307                         //strcpy(tempminchunk, "--minchunk"); 
1308                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1309                         cPara.push_back(tempminchunk);
1310                         char* tempMinchunk = new char[minchunk.length()+1];
1311                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1312                         //strcpy(tempMinchunk, minchunk.c_str());
1313                         cPara.push_back(tempMinchunk);
1314                 }
1315                 
1316                 if (useIdsmoothwindow) {
1317                         char* tempidsmoothwindow = new char[17]; 
1318                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1319                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1320                         cPara.push_back(tempidsmoothwindow);
1321                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1322                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1323                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1324                         cPara.push_back(tempIdsmoothwindow);
1325                 }
1326                 
1327                 /*if (useMinsmoothid) {
1328                         char* tempminsmoothid = new char[14]; 
1329                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1330                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1331                         cPara.push_back(tempminsmoothid);
1332                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1333                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1334                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1335                         cPara.push_back(tempMinsmoothid);
1336                 }*/
1337                 
1338                 if (useMaxp) {
1339                         char* tempmaxp = new char[7]; 
1340                         //strcpy(tempmaxp, "--maxp"); 
1341                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1342                         cPara.push_back(tempmaxp);
1343                         char* tempMaxp = new char[maxp.length()+1];
1344                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1345                         //strcpy(tempMaxp, maxp.c_str());
1346                         cPara.push_back(tempMaxp);
1347                 }
1348                 
1349                 if (!skipgaps) {
1350                         char* tempskipgaps = new char[13]; 
1351                         //strcpy(tempskipgaps, "--[no]skipgaps");
1352                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1353                         cPara.push_back(tempskipgaps);
1354                 }
1355                 
1356                 if (!skipgaps2) {
1357                         char* tempskipgaps2 = new char[14]; 
1358                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1359                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1360                         cPara.push_back(tempskipgaps2);
1361                 }
1362                 
1363                 if (useMinlen) {
1364                         char* tempminlen = new char[9]; 
1365                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1366                         //strcpy(tempminlen, "--minlen"); 
1367                         cPara.push_back(tempminlen);
1368                         char* tempMinlen = new char[minlen.length()+1];
1369                         //strcpy(tempMinlen, minlen.c_str());
1370                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1371                         cPara.push_back(tempMinlen);
1372                 }
1373                 
1374                 if (useMaxlen) {
1375                         char* tempmaxlen = new char[9]; 
1376                         //strcpy(tempmaxlen, "--maxlen"); 
1377                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1378                         cPara.push_back(tempmaxlen);
1379                         char* tempMaxlen = new char[maxlen.length()+1];
1380                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1381                         //strcpy(tempMaxlen, maxlen.c_str());
1382                         cPara.push_back(tempMaxlen);
1383                 }
1384                 
1385                 if (ucl) {
1386                         char* tempucl = new char[5]; 
1387                         strcpy(tempucl, "--ucl"); 
1388                         cPara.push_back(tempucl);
1389                 }
1390                 
1391                 if (useQueryfract) {
1392                         char* tempqueryfract = new char[13]; 
1393                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1394                         //strcpy(tempqueryfract, "--queryfract"); 
1395                         cPara.push_back(tempqueryfract);
1396                         char* tempQueryfract = new char[queryfract.length()+1];
1397                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1398                         //strcpy(tempQueryfract, queryfract.c_str());
1399                         cPara.push_back(tempQueryfract);
1400                 }
1401                 
1402                 
1403                 char** uchimeParameters;
1404                 uchimeParameters = new char*[cPara.size()];
1405                 string commandString = "";
1406                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1407                 //int numArgs = cPara.size();
1408                 
1409                 //uchime_main(numArgs, uchimeParameters); 
1410                 //cout << "commandString = " << commandString << endl;
1411 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1412 #else
1413                 commandString = "\"" + commandString + "\"";
1414 #endif
1415         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1416                 system(commandString.c_str());
1417                 
1418                 //free memory
1419                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1420                 delete[] uchimeParameters; 
1421                 
1422                 //remove "" from filenames
1423                 outputFName = outputFName.substr(1, outputFName.length()-2);
1424                 filename = filename.substr(1, filename.length()-2);
1425                 alns = alns.substr(1, alns.length()-2);
1426                 
1427                 if (m->control_pressed) { return 0; }
1428                 
1429                 //create accnos file from uchime results
1430                 ifstream in; 
1431                 m->openInputFile(outputFName, in);
1432                 
1433                 ofstream out;
1434                 m->openOutputFile(accnos, out);
1435                 
1436                 int num = 0;
1437                 numChimeras = 0;
1438                 while(!in.eof()) {
1439                         
1440                         if (m->control_pressed) { break; }
1441                         
1442                         string name = "";
1443                         string chimeraFlag = "";
1444                         in >> chimeraFlag >> name;
1445                         
1446                         //fix name if needed
1447                         if (templatefile == "self") { 
1448                                 name = name.substr(0, name.length()-1); //rip off last /
1449                                 name = name.substr(0, name.find_last_of('/'));
1450                         }
1451                         
1452                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1453                         m->gobble(in);
1454                         
1455                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1456                         num++;
1457                 }
1458                 in.close();
1459                 out.close();
1460                 
1461         //if (templatefile != "self") {  m->mothurRemove(filename); }
1462         
1463                 return num;
1464         }
1465         catch(exception& e) {
1466                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1467                 exit(1);
1468         }
1469 }
1470 /**************************************************************************************************/
1471 //uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
1472 int ChimeraUchimeCommand::prepFile(string filename, string output) {
1473         try {
1474         
1475         ifstream in;
1476         m->openInputFile(filename, in);
1477         
1478         ofstream out;
1479         m->openOutputFile(output, out);
1480         
1481         while (!in.eof()) {
1482             if (m->control_pressed) { break;  }
1483             
1484             Sequence seq(in); m->gobble(in);
1485             
1486             if (seq.getName() != "") { seq.printSequence(out); }
1487         }
1488         in.close();
1489         out.close();
1490         
1491         return 0;
1492     }
1493         catch(exception& e) {
1494                 m->errorOut(e, "ChimeraUchimeCommand", "prepFile");
1495                 exit(1);
1496         }
1497 }
1498 /**************************************************************************************************/
1499
1500 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1501         try {
1502                 
1503                 processIDS.clear();
1504                 int process = 1;
1505                 int num = 0;
1506                 vector<string> files;
1507                 
1508 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1509                 //break up file into multiple files
1510                 m->divideFile(filename, processors, files);
1511                 
1512                 if (m->control_pressed) {  return 0;  }
1513                                 
1514                 //loop through and create all the processes you want
1515                 while (process != processors) {
1516                         int pid = fork();
1517                         
1518                         if (pid > 0) {
1519                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1520                                 process++;
1521                         }else if (pid == 0){
1522                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1523                                 
1524                                 //pass numSeqs to parent
1525                                 ofstream out;
1526                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1527                                 m->openOutputFile(tempFile, out);
1528                                 out << num << endl;
1529                                 out << numChimeras << endl;
1530                                 out.close();
1531                                 
1532                                 exit(0);
1533                         }else { 
1534                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1535                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1536                                 exit(0);
1537                         }
1538                 }
1539                 
1540                 //do my part
1541                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1542                 
1543                 //force parent to wait until all the processes are done
1544                 for (int i=0;i<processIDS.size();i++) { 
1545                         int temp = processIDS[i];
1546                         wait(&temp);
1547                 }
1548                 
1549                 for (int i = 0; i < processIDS.size(); i++) {
1550                         ifstream in;
1551                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1552                         m->openInputFile(tempFile, in);
1553                         if (!in.eof()) { 
1554                                 int tempNum = 0; 
1555                                 in >> tempNum; m->gobble(in);
1556                                 num += tempNum; 
1557                                 in >> tempNum;
1558                                 numChimeras += tempNum;
1559                         }
1560                         in.close(); m->mothurRemove(tempFile);
1561                 }
1562 #else
1563                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1564                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1565                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1566                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1567                 
1568                 //divide file
1569                 int count = 0;
1570                 int spot = 0;
1571                 map<int, ofstream*> filehandles;
1572                 map<int, ofstream*>::iterator it3;
1573                 
1574                 ofstream* temp;
1575                 for (int i = 0; i < processors; i++) {
1576                         temp = new ofstream;
1577                         filehandles[i] = temp;
1578                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1579                         files.push_back(filename+toString(i)+".temp");
1580                 }
1581                 
1582                 ifstream in;
1583                 m->openInputFile(filename, in);
1584                 
1585                 while(!in.eof()) {
1586                         
1587                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1588                         
1589                         Sequence tempSeq(in); m->gobble(in); 
1590                         
1591                         if (tempSeq.getName() != "") {
1592                                 tempSeq.printSequence(*(filehandles[spot])); 
1593                                 spot++; count++;
1594                                 if (spot == processors) { spot = 0; }
1595                         }
1596                 }
1597                 in.close();
1598                 
1599                 //delete memory
1600                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1601                         (*(it3->second)).close();
1602                         delete it3->second;
1603                 }
1604                 
1605                 //sanity check for number of processors
1606                 if (count < processors) { processors = count; }
1607                 
1608                 vector<uchimeData*> pDataArray; 
1609                 DWORD   dwThreadIdArray[processors-1];
1610                 HANDLE  hThreadArray[processors-1]; 
1611                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1612                 
1613                 //Create processor worker threads.
1614                 for( int i=1; i<processors; i++ ){
1615                         // Allocate memory for thread data.
1616                         string extension = toString(i) + ".temp";
1617                         
1618                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1619                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1620                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1621                         
1622                         pDataArray.push_back(tempUchime);
1623                         processIDS.push_back(i);
1624                         
1625                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1626                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1627                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1628                 }
1629                 
1630                 
1631                 //using the main process as a worker saves time and memory
1632                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1633                 
1634                 //Wait until all threads have terminated.
1635                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1636                 
1637                 //Close all thread handles and free memory allocations.
1638                 for(int i=0; i < pDataArray.size(); i++){
1639                         num += pDataArray[i]->count;
1640                         numChimeras += pDataArray[i]->numChimeras;
1641                         CloseHandle(hThreadArray[i]);
1642                         delete pDataArray[i];
1643                 }
1644 #endif          
1645                 
1646                 //append output files
1647                 for(int i=0;i<processIDS.size();i++){
1648                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1649                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1650                         
1651                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1652                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1653                         
1654                         if (chimealns) {
1655                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1656                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1657                         }
1658                 }
1659                 
1660                 //get rid of the file pieces.
1661                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1662                 return num;     
1663         }
1664         catch(exception& e) {
1665                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1666                 exit(1);
1667         }
1668 }
1669 /**************************************************************************************************/
1670
1671 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1672         try {
1673                 
1674                 processIDS.clear();
1675                 int process = 1;
1676                 int num = 0;
1677                 
1678                 //sanity check
1679                 if (groups.size() < processors) { processors = groups.size(); }
1680                 
1681                 //divide the groups between the processors
1682                 vector<linePair> lines;
1683                 int numGroupsPerProcessor = groups.size() / processors;
1684                 for (int i = 0; i < processors; i++) {
1685                         int startIndex =  i * numGroupsPerProcessor;
1686                         int endIndex = (i+1) * numGroupsPerProcessor;
1687                         if(i == (processors - 1)){      endIndex = groups.size();       }
1688                         lines.push_back(linePair(startIndex, endIndex));
1689                 }
1690                 
1691 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1692                                 
1693                 //loop through and create all the processes you want
1694                 while (process != processors) {
1695                         int pid = fork();
1696                         
1697                         if (pid > 0) {
1698                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1699                                 process++;
1700                         }else if (pid == 0){
1701                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1702                                 
1703                                 //pass numSeqs to parent
1704                                 ofstream out;
1705                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1706                                 m->openOutputFile(tempFile, out);
1707                                 out << num << endl;
1708                                 out.close();
1709                                 
1710                                 exit(0);
1711                         }else { 
1712                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1713                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1714                                 exit(0);
1715                         }
1716                 }
1717                 
1718                 //do my part
1719                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1720                 
1721                 //force parent to wait until all the processes are done
1722                 for (int i=0;i<processIDS.size();i++) { 
1723                         int temp = processIDS[i];
1724                         wait(&temp);
1725                 }
1726                 
1727                 for (int i = 0; i < processIDS.size(); i++) {
1728                         ifstream in;
1729                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1730                         m->openInputFile(tempFile, in);
1731                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1732                         in.close(); m->mothurRemove(tempFile);
1733                 }
1734                                 
1735 #else
1736                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1737                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1738                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1739                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1740                 
1741                 vector<uchimeData*> pDataArray; 
1742                 DWORD   dwThreadIdArray[processors-1];
1743                 HANDLE  hThreadArray[processors-1]; 
1744                 
1745                 //Create processor worker threads.
1746                 for( int i=1; i<processors; i++ ){
1747                         // Allocate memory for thread data.
1748                         string extension = toString(i) + ".temp";
1749                         
1750                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1751                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
1752                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1753                         
1754                         pDataArray.push_back(tempUchime);
1755                         processIDS.push_back(i);
1756                         
1757                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1758                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1759                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1760                 }
1761                 
1762                 
1763                 //using the main process as a worker saves time and memory
1764                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1765                 
1766                 //Wait until all threads have terminated.
1767                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1768                 
1769                 //Close all thread handles and free memory allocations.
1770                 for(int i=0; i < pDataArray.size(); i++){
1771                         num += pDataArray[i]->count;
1772                         CloseHandle(hThreadArray[i]);
1773                         delete pDataArray[i];
1774                 }
1775 #endif          
1776                 
1777                                 
1778                 //append output files
1779                 for(int i=0;i<processIDS.size();i++){
1780                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1781                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1782                         
1783                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1784                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1785                         
1786                         if (chimealns) {
1787                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1788                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1789                         }
1790                 }
1791                 
1792                 return num;     
1793                 
1794         }
1795         catch(exception& e) {
1796                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1797                 exit(1);
1798         }
1799 }
1800 /**************************************************************************************************/
1801