]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
added count file to chimera.uchime. found issue with uchime program that indicated...
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
23         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
29                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
30                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
31                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
32                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
33                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
34                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
35                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
36                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
37                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
38                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
39                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
40                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
41                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
42                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
43                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
44                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
45                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
46
47                 vector<string> myArray;
48                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
49                 return myArray;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string ChimeraUchimeCommand::getHelpString(){   
58         try {
59                 string helpString = "";
60                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
61                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
62                 helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
63                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
64                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
65         helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
66                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
67                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
68                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
69                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
70                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
71                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
72                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
73                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
74                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
75                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
76                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
77                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
78                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
79                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
80                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
81                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
82                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
83                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
84                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
85                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
86                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
87                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
88 #ifdef USE_MPI
89                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
90 #endif
91                 helpString += "The chimera.uchime command should be in the following format: \n";
92                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
93                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
95                 return helpString;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){    
104         try {
105         string outputFileName = "";
106                 map<string, vector<string> >::iterator it;
107         
108         //is this a type this command creates
109         it = outputTypes.find(type);
110         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
111         else {
112             if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
113             else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
114             else if (type == "alns") {  outputFileName =  "uchime.alns"; }
115             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
116         }
117         return outputFileName;
118         }
119         catch(exception& e) {
120                 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
121                 exit(1);
122         }
123 }
124 //**********************************************************************************************************************
125 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["alns"] = tempOutNames;
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
136                 exit(1);
137         }
138 }
139 //***************************************************************************************************************
140 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
141         try {
142                 abort = false; calledHelp = false; hasName=false; hasCount=false;
143                 ReferenceDB* rdb = ReferenceDB::getInstance();
144                 
145                 //allow user to run help
146                 if(option == "help") { help(); abort = true; calledHelp = true; }
147                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
148                 
149                 else {
150                         vector<string> myArray = setParameters();
151                         
152                         OptionParser parser(option);
153                         map<string,string> parameters = parser.getParameters();
154                         
155                         ValidParameters validParameter("chimera.uchime");
156                         map<string,string>::iterator it;
157                         
158                         //check to make sure all parameters are valid for command
159                         for (it = parameters.begin(); it != parameters.end(); it++) { 
160                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
161                         }
162                         
163                         vector<string> tempOutNames;
164                         outputTypes["chimera"] = tempOutNames;
165                         outputTypes["accnos"] = tempOutNames;
166                         outputTypes["alns"] = tempOutNames;
167                         
168                         //if the user changes the input directory command factory will send this info to us in the output parameter 
169                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
170                         if (inputDir == "not found"){   inputDir = "";          }
171                         
172                         //check for required parameters
173                         fastafile = validParameter.validFile(parameters, "fasta", false);
174                         if (fastafile == "not found") {                                 
175                                 //if there is a current fasta file, use it
176                                 string filename = m->getFastaFile(); 
177                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179                         }else { 
180                                 m->splitAtDash(fastafile, fastaFileNames);
181                                 
182                                 //go through files and make sure they are good, if not, then disregard them
183                                 for (int i = 0; i < fastaFileNames.size(); i++) {
184                                         
185                                         bool ignore = false;
186                                         if (fastaFileNames[i] == "current") { 
187                                                 fastaFileNames[i] = m->getFastaFile(); 
188                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
189                                                 else {  
190                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
191                                                         //erase from file list
192                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
193                                                         i--;
194                                                 }
195                                         }
196                                         
197                                         if (!ignore) {
198                                                 
199                                                 if (inputDir != "") {
200                                                         string path = m->hasPath(fastaFileNames[i]);
201                                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
203                                                 }
204                                                 
205                                                 int ableToOpen;
206                                                 ifstream in;
207                                                 
208                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
209                                                 
210                                                 //if you can't open it, try default location
211                                                 if (ableToOpen == 1) {
212                                                         if (m->getDefaultPath() != "") { //default path is set
213                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
214                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
215                                                                 ifstream in2;
216                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
217                                                                 in2.close();
218                                                                 fastaFileNames[i] = tryPath;
219                                                         }
220                                                 }
221                                                 
222                                                 if (ableToOpen == 1) {
223                                                         if (m->getOutputDir() != "") { //default path is set
224                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
225                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
226                                                                 ifstream in2;
227                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
228                                                                 in2.close();
229                                                                 fastaFileNames[i] = tryPath;
230                                                         }
231                                                 }
232                                                 
233                                                 in.close();
234                                                 
235                                                 if (ableToOpen == 1) { 
236                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
237                                                         //erase from file list
238                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
239                                                         i--;
240                                                 }else {
241                                                         m->setFastaFile(fastaFileNames[i]);
242                                                 }
243                                         }
244                                 }
245                                 
246                                 //make sure there is at least one valid file left
247                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
248                         }
249                         
250                         
251                         //check for required parameters
252                         namefile = validParameter.validFile(parameters, "name", false);
253                         if (namefile == "not found") { namefile = "";   }
254                         else { 
255                                 m->splitAtDash(namefile, nameFileNames);
256                                 
257                                 //go through files and make sure they are good, if not, then disregard them
258                                 for (int i = 0; i < nameFileNames.size(); i++) {
259                                         
260                                         bool ignore = false;
261                                         if (nameFileNames[i] == "current") { 
262                                                 nameFileNames[i] = m->getNameFile(); 
263                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
264                                                 else {  
265                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
266                                                         //erase from file list
267                                                         nameFileNames.erase(nameFileNames.begin()+i);
268                                                         i--;
269                                                 }
270                                         }
271                                         
272                                         if (!ignore) {
273                                                 
274                                                 if (inputDir != "") {
275                                                         string path = m->hasPath(nameFileNames[i]);
276                                                         //if the user has not given a path then, add inputdir. else leave path alone.
277                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
278                                                 }
279                                                 
280                                                 int ableToOpen;
281                                                 ifstream in;
282                                                 
283                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
284                                                 
285                                                 //if you can't open it, try default location
286                                                 if (ableToOpen == 1) {
287                                                         if (m->getDefaultPath() != "") { //default path is set
288                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
289                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
290                                                                 ifstream in2;
291                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
292                                                                 in2.close();
293                                                                 nameFileNames[i] = tryPath;
294                                                         }
295                                                 }
296                                                 
297                                                 if (ableToOpen == 1) {
298                                                         if (m->getOutputDir() != "") { //default path is set
299                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
300                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
301                                                                 ifstream in2;
302                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
303                                                                 in2.close();
304                                                                 nameFileNames[i] = tryPath;
305                                                         }
306                                                 }
307                                                 
308                                                 in.close();
309                                                 
310                                                 if (ableToOpen == 1) { 
311                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
312                                                         //erase from file list
313                                                         nameFileNames.erase(nameFileNames.begin()+i);
314                                                         i--;
315                                                 }else {
316                                                         m->setNameFile(nameFileNames[i]);
317                                                 }
318                                         }
319                                 }
320                         }
321             
322             if (nameFileNames.size() != 0) { hasName = true; }
323             
324             //check for required parameters
325             vector<string> countfileNames;
326                         countfile = validParameter.validFile(parameters, "count", false);
327                         if (countfile == "not found") { 
328                 countfile = "";  
329                         }else { 
330                                 m->splitAtDash(countfile, countfileNames);
331                                 
332                                 //go through files and make sure they are good, if not, then disregard them
333                                 for (int i = 0; i < countfileNames.size(); i++) {
334                                         
335                                         bool ignore = false;
336                                         if (countfileNames[i] == "current") { 
337                                                 countfileNames[i] = m->getCountTableFile(); 
338                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
339                                                 else {  
340                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
341                                                         //erase from file list
342                                                         countfileNames.erase(countfileNames.begin()+i);
343                                                         i--;
344                                                 }
345                                         }
346                                         
347                                         if (!ignore) {
348                                                 
349                                                 if (inputDir != "") {
350                                                         string path = m->hasPath(countfileNames[i]);
351                                                         //if the user has not given a path then, add inputdir. else leave path alone.
352                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
353                                                 }
354                                                 
355                                                 int ableToOpen;
356                                                 ifstream in;
357                                                 
358                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
359                                                 
360                                                 //if you can't open it, try default location
361                                                 if (ableToOpen == 1) {
362                                                         if (m->getDefaultPath() != "") { //default path is set
363                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
364                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
365                                                                 ifstream in2;
366                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
367                                                                 in2.close();
368                                                                 countfileNames[i] = tryPath;
369                                                         }
370                                                 }
371                                                 
372                                                 if (ableToOpen == 1) {
373                                                         if (m->getOutputDir() != "") { //default path is set
374                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
375                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
376                                                                 ifstream in2;
377                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
378                                                                 in2.close();
379                                                                 countfileNames[i] = tryPath;
380                                                         }
381                                                 }
382                                                 
383                                                 in.close();
384                                                 
385                                                 if (ableToOpen == 1) { 
386                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
387                                                         //erase from file list
388                                                         countfileNames.erase(countfileNames.begin()+i);
389                                                         i--;
390                                                 }else {
391                                                         m->setCountTableFile(countfileNames[i]);
392                                                 }
393                                         }
394                                 }
395                         }
396             
397             if (countfileNames.size() != 0) { hasCount = true; }
398             
399                         //make sure there is at least one valid file left
400             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
401             
402             if (!hasName && hasCount) { nameFileNames = countfileNames; }
403             
404                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
405                         
406                         bool hasGroup = true;
407                         groupfile = validParameter.validFile(parameters, "group", false);
408                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
409                         else { 
410                                 m->splitAtDash(groupfile, groupFileNames);
411                                 
412                                 //go through files and make sure they are good, if not, then disregard them
413                                 for (int i = 0; i < groupFileNames.size(); i++) {
414                                         
415                                         bool ignore = false;
416                                         if (groupFileNames[i] == "current") { 
417                                                 groupFileNames[i] = m->getGroupFile(); 
418                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
419                                                 else {  
420                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
421                                                         //erase from file list
422                                                         groupFileNames.erase(groupFileNames.begin()+i);
423                                                         i--;
424                                                 }
425                                         }
426                                         
427                                         if (!ignore) {
428                                                 
429                                                 if (inputDir != "") {
430                                                         string path = m->hasPath(groupFileNames[i]);
431                                                         //if the user has not given a path then, add inputdir. else leave path alone.
432                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
433                                                 }
434                                                 
435                                                 int ableToOpen;
436                                                 ifstream in;
437                                                 
438                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
439                                                 
440                                                 //if you can't open it, try default location
441                                                 if (ableToOpen == 1) {
442                                                         if (m->getDefaultPath() != "") { //default path is set
443                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
444                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
445                                                                 ifstream in2;
446                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
447                                                                 in2.close();
448                                                                 groupFileNames[i] = tryPath;
449                                                         }
450                                                 }
451                                                 
452                                                 if (ableToOpen == 1) {
453                                                         if (m->getOutputDir() != "") { //default path is set
454                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
455                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
456                                                                 ifstream in2;
457                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
458                                                                 in2.close();
459                                                                 groupFileNames[i] = tryPath;
460                                                         }
461                                                 }
462                                                 
463                                                 in.close();
464                                                 
465                                                 if (ableToOpen == 1) { 
466                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
467                                                         //erase from file list
468                                                         groupFileNames.erase(groupFileNames.begin()+i);
469                                                         i--;
470                                                 }else {
471                                                         m->setGroupFile(groupFileNames[i]);
472                                                 }
473                                         }
474                                 }
475                                 
476                                 //make sure there is at least one valid file left
477                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
478                         }
479                         
480                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
481                         
482             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
483                         //if the user changes the output directory command factory will send this info to us in the output parameter 
484                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
485                         
486                         
487                         //if the user changes the output directory command factory will send this info to us in the output parameter 
488                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
489                         
490                         string path;
491                         it = parameters.find("reference");
492                         //user has given a template file
493                         if(it != parameters.end()){ 
494                                 if (it->second == "self") { templatefile = "self"; }
495                                 else {
496                                         path = m->hasPath(it->second);
497                                         //if the user has not given a path then, add inputdir. else leave path alone.
498                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
499                                         
500                                         templatefile = validParameter.validFile(parameters, "reference", true);
501                                         if (templatefile == "not open") { abort = true; }
502                                         else if (templatefile == "not found") { //check for saved reference sequences
503                                                 if (rdb->getSavedReference() != "") {
504                                                         templatefile = rdb->getSavedReference();
505                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
506                                                 }else {
507                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
508                                                         m->mothurOutEndLine();
509                                                         abort = true; 
510                                                 }
511                                         }
512                                 }
513                         }else if (hasName) {  templatefile = "self"; }
514             else if (hasCount) {  templatefile = "self"; }
515                         else { 
516                                 if (rdb->getSavedReference() != "") {
517                                         templatefile = rdb->getSavedReference();
518                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
519                                 }else {
520                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
521                                         m->mothurOutEndLine();
522                                         templatefile = ""; abort = true; 
523                                 } 
524                         }
525                                 
526                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
527                         m->setProcessors(temp);
528                         m->mothurConvert(temp, processors);
529                         
530                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
531                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
532                         
533                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
534                         chimealns = m->isTrue(temp); 
535                         
536                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
537                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
538                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
539                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
540                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
541                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
542                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
543                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
544                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
545                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
546                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
547                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
548                         
549                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
550                         ucl = m->isTrue(temp);
551                         
552                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
553                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
554                         
555                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
556                         skipgaps = m->isTrue(temp); 
557
558                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
559                         skipgaps2 = m->isTrue(temp); 
560                         
561                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
562                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
563                         
564                         //look for uchime exe
565                         path = m->argv;
566                         string tempPath = path;
567                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
568                         path = path.substr(0, (tempPath.find_last_of('m')));
569                         
570                         string uchimeCommand;
571 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
572                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
573             if (m->debug) { 
574                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
575                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
576                 newCommand->execute();
577                 delete newCommand;
578                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
579                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
580                 newCommand->execute();
581                 delete newCommand;
582             }
583 #else
584                         uchimeCommand = path + "uchime.exe";
585 #endif
586         
587                         //test to make sure uchime exists
588                         ifstream in;
589                         uchimeCommand = m->getFullPathName(uchimeCommand);
590                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
591                         if(ableToOpen == 1) {   
592                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
593                 //check to see if uchime is in the path??
594                 
595                 string uLocation = m->findProgramPath("uchime");
596                 
597                 
598                 ifstream in2;
599 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
600                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
601 #else
602                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
603 #endif
604
605                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
606                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
607             }else {  uchimeLocation = uchimeCommand; }
608             
609             uchimeLocation = m->getFullPathName(uchimeLocation);
610         }
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
614                 exit(1);
615         }
616 }
617 //***************************************************************************************************************
618
619 int ChimeraUchimeCommand::execute(){
620         try{
621         
622         if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
623                 
624                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
625                 
626                 for (int s = 0; s < fastaFileNames.size(); s++) {
627                         
628                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
629                         
630                         int start = time(NULL); 
631                         string nameFile = "";
632                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
633                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
634                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
635                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
636                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
637                                 
638                         //you provided a groupfile
639                         string groupFile = "";
640             bool hasGroup = false;
641                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
642             else if (hasCount) {
643                 CountTable ct;
644                 if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
645             }
646                         
647                         if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
648
649                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
650                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
651                                         nameFile = nameFileNames[s];
652                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
653                                                                                 
654                                 map<string, string> seqs;  
655                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
656
657                                 //read namefile
658                                 vector<seqPriorityNode> nameMapCount;
659                 int error;
660                 if (hasCount) {
661                     CountTable ct;
662                     ct.readTable(nameFile);
663                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
664                         int num = ct.getNumSeqs(it->first);
665                         if (num == 0) { error = 1; }
666                         else {
667                             seqPriorityNode temp(num, it->second, it->first);
668                             nameMapCount.push_back(temp);
669                         }
670                     }
671                 }else {
672                     error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
673                 }
674                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
675                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
676                                 
677                                 printFile(nameMapCount, newFasta);
678                                 fastaFileNames[s] = newFasta;
679                         }
680                         
681                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
682                         
683                         if (hasGroup) {
684                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
685                                         nameFile = nameFileNames[s];
686                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
687                                 
688                                 //Parse sequences by group
689                 vector<string> groups;
690                 map<string, string> uniqueNames;
691                 if (hasCount) {
692                     cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
693                     groups = cparser->getNamesOfGroups();
694                     uniqueNames = cparser->getAllSeqsMap();
695                 }else{
696                     sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
697                     groups = sparser->getNamesOfGroups();
698                     uniqueNames = sparser->getAllSeqsMap();
699                 }
700                                         
701                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
702                                                                 
703                                 //clears files
704                                 ofstream out, out1, out2;
705                                 m->openOutputFile(outputFileName, out); out.close(); 
706                                 m->openOutputFile(accnosFileName, out1); out1.close();
707                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
708                                 int totalSeqs = 0;
709                                 
710                                 if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
711                                 else                            {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
712
713                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
714                 if (hasCount) { delete cparser; }
715                 else { delete sparser; }
716
717                                 int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
718                                 
719                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
720                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
721                                 
722                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
723                                         
724                         }else{
725                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
726                         
727                                 int numSeqs = 0;
728                                 int numChimeras = 0;
729
730                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
731                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
732                                 
733                                 //add headings
734                                 ofstream out;
735                                 m->openOutputFile(outputFileName+".temp", out); 
736                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
737                                 out.close();
738                                 
739                                 m->appendFiles(outputFileName, outputFileName+".temp");
740                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
741                                 
742                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
743                         
744                                 //remove file made for uchime
745                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
746                         
747                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
748                         }
749                         
750                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
751                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
752                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
753                 }
754         
755                 //set accnos file as new current accnosfile
756                 string current = "";
757                 itTypes = outputTypes.find("accnos");
758                 if (itTypes != outputTypes.end()) {
759                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
760                 }
761                 
762                 m->mothurOutEndLine();
763                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
764                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
765                 m->mothurOutEndLine();
766                 
767                 return 0;
768                 
769         }
770         catch(exception& e) {
771                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
772                 exit(1);
773         }
774 }
775 //**********************************************************************************************************************
776 int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
777         try {
778                 map<string, string>::iterator itUnique;
779                 int total = 0;
780                 
781                 //edit accnos file
782                 ifstream in2; 
783                 m->openInputFile(accnosFileName, in2);
784                 
785                 ofstream out2;
786                 m->openOutputFile(accnosFileName+".temp", out2);
787                 
788                 string name;
789                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
790                 set<string>::iterator itNames;
791                 set<string> chimerasInFile;
792                 set<string>::iterator itChimeras;
793
794                 
795                 while (!in2.eof()) {
796                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
797                         
798                         in2 >> name; m->gobble(in2);
799                         
800                         //find unique name
801                         itUnique = uniqueNames.find(name);
802                         
803                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
804                         else {
805                                 itChimeras = chimerasInFile.find((itUnique->second));
806                                 
807                                 if (itChimeras == chimerasInFile.end()) {
808                                         out2 << itUnique->second << endl;
809                                         chimerasInFile.insert((itUnique->second));
810                                         total++;
811                                 }
812                         }
813                 }
814                 in2.close();
815                 out2.close();
816                 
817                 m->mothurRemove(accnosFileName);
818                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
819                 
820                 
821                 
822                 //edit chimera file
823                 ifstream in; 
824                 m->openInputFile(outputFileName, in);
825                 
826                 ofstream out;
827                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
828                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
829                 
830                 float temp1;
831                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
832                 name = "";
833                 namesInFile.clear();    
834                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
835                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
836                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
837                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
838                 */
839                 
840                 while (!in.eof()) {
841                         
842                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
843                         
844                         bool print = false;
845                         in >> temp1;    m->gobble(in);
846                         in >> name;             m->gobble(in);
847                         in >> parent1;  m->gobble(in);
848                         in >> parent2;  m->gobble(in);
849                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
850                         m->gobble(in);
851                         
852                         //parse name - name will look like U68590/ab=1/
853                         string restOfName = "";
854                         int pos = name.find_first_of('/');
855                         if (pos != string::npos) {
856                                 restOfName = name.substr(pos);
857                                 name = name.substr(0, pos);
858                         }
859                         
860                         //find unique name
861                         itUnique = uniqueNames.find(name);
862                         
863                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
864                         else {
865                                 name = itUnique->second;
866                                 //is this name already in the file
867                                 itNames = namesInFile.find((name));
868                                 
869                                 if (itNames == namesInFile.end()) { //no not in file
870                                         if (flag == "N") { //are you really a no??
871                                                 //is this sequence really not chimeric??
872                                                 itChimeras = chimerasInFile.find(name);
873                                                 
874                                                 //then you really are a no so print, otherwise skip
875                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
876                                         }else{ print = true; }
877                                 }
878                         }
879                         
880                         if (print) {
881                                 out << temp1 << '\t' << name << restOfName << '\t';
882                                 namesInFile.insert(name);
883                                 
884                                 //parse parent1 names
885                                 if (parent1 != "*") {
886                                         restOfName = "";
887                                         pos = parent1.find_first_of('/');
888                                         if (pos != string::npos) {
889                                                 restOfName = parent1.substr(pos);
890                                                 parent1 = parent1.substr(0, pos);
891                                         }
892                                         
893                                         itUnique = uniqueNames.find(parent1);
894                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
895                                         else {  out << itUnique->second << restOfName << '\t';  }
896                                 }else { out << parent1 << '\t'; }
897                                 
898                                 //parse parent2 names
899                                 if (parent2 != "*") {
900                                         restOfName = "";
901                                         pos = parent2.find_first_of('/');
902                                         if (pos != string::npos) {
903                                                 restOfName = parent2.substr(pos);
904                                                 parent2 = parent2.substr(0, pos);
905                                         }
906                                         
907                                         itUnique = uniqueNames.find(parent2);
908                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
909                                         else {  out << itUnique->second << restOfName << '\t';  }
910                                 }else { out << parent2 << '\t'; }
911                                 
912                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
913                         }
914                 }
915                 in.close();
916                 out.close();
917                 
918                 m->mothurRemove(outputFileName);
919                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
920                 
921                                 
922                 //edit anls file
923                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
924                 /*
925                  ------------------------------------------------------------------------
926                  Query   (  179 nt) F21Fcsw_11639/ab=591/
927                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
928                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
929                  
930                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
931                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
932                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
933                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
934                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
935                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
936                  
937                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
938                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
939                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
940                  Diffs      NNN     N N  N                   N  N BB    NNN                              
941                  Votes      000     0 0  0                   0  0 ++    000                              
942                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
943                  
944                  A   159 TGGAACTGAGACACGGTCCAA 179
945                  Q   159 TGGAACTGAGACACGGTCCAA 179
946                  B   161 TGGAACTGAGACACGGTCCAA 181
947                  Diffs                        
948                  Votes                        
949                  Model   BBBBBBBBBBBBBBBBBBBBB
950                  
951                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
952                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
953                 */
954                 if (chimealns) {
955                         ifstream in3; 
956                         m->openInputFile(alnsFileName, in3);
957                 
958                         ofstream out3;
959                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
960                 
961                         name = "";
962                         namesInFile.clear();
963                         string line = "";
964                         
965                         while (!in3.eof()) {
966                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
967                                 
968                                 line = "";
969                                 line = m->getline(in3); 
970                                 string temp = "";
971                                 
972                                 if (line != "") {
973                                         istringstream iss(line);
974                                         iss >> temp;
975                                         
976                                         //are you a name line
977                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
978                                                 int spot = 0;
979                                                 for (int i = 0; i < line.length(); i++) {
980                                                         spot = i;
981                                                         if (line[i] == ')') { break; }
982                                                         else { out3 << line[i]; }
983                                                 }
984                                                 
985                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
986                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
987                                                 else {
988                                                         out << line[spot] << line[spot+1];
989                                                         
990                                                         name = line.substr(spot+2);
991                                                         
992                                                         //parse name - name will either look like U68590/ab=1/ or U68590
993                                                         string restOfName = "";
994                                                         int pos = name.find_first_of('/');
995                                                         if (pos != string::npos) {
996                                                                 restOfName = name.substr(pos);
997                                                                 name = name.substr(0, pos);
998                                                         }
999                                                         
1000                                                         //find unique name
1001                                                         itUnique = uniqueNames.find(name);
1002                                                         
1003                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
1004                                                         else {
1005                                                                 //only limit repeats on query names
1006                                                                 if (temp == "Query") {
1007                                                                         itNames = namesInFile.find((itUnique->second));
1008                                                                         
1009                                                                         if (itNames == namesInFile.end()) {
1010                                                                                 out << itUnique->second << restOfName << endl;
1011                                                                                 namesInFile.insert((itUnique->second));
1012                                                                         }
1013                                                                 }else { out << itUnique->second << restOfName << endl;  }
1014                                                         }
1015                                                         
1016                                                 }
1017                                                 
1018                                         }else { //not need to alter line
1019                                                 out3 << line << endl;
1020                                         }
1021                                 }else { out3 << endl; }
1022                         }
1023                         in3.close();
1024                         out3.close();
1025                         
1026                         m->mothurRemove(alnsFileName);
1027                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
1028                 }
1029                 
1030                 return total;
1031         }
1032         catch(exception& e) {
1033                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
1034                 exit(1);
1035         }
1036 }       
1037 //**********************************************************************************************************************
1038 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
1039         try {
1040                 
1041                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1042                 
1043                 ofstream out;
1044                 m->openOutputFile(filename, out);
1045                 
1046                 //print new file in order of
1047                 for (int i = 0; i < nameMapCount.size(); i++) {
1048                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
1049                 }
1050                 out.close();
1051                 
1052                 return 0;
1053         }
1054         catch(exception& e) {
1055                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
1056                 exit(1);
1057         }
1058 }       
1059 //**********************************************************************************************************************
1060 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
1061         try {
1062                 //create input file for uchime
1063                 //read through fastafile and store info
1064                 ifstream in;
1065                 m->openInputFile(filename, in);
1066                 
1067                 while (!in.eof()) {
1068                         
1069                         if (m->control_pressed) { in.close(); return 0; }
1070                         
1071                         Sequence seq(in); m->gobble(in);
1072                         seqs[seq.getName()] = seq.getAligned();
1073                 }
1074                 in.close();
1075                 
1076                 return 0;
1077         }
1078         catch(exception& e) {
1079                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
1080                 exit(1);
1081         }
1082 }       
1083 //**********************************************************************************************************************
1084
1085 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
1086         try {
1087                 string nameFile = "";
1088                 
1089                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1090                 
1091                 //use unique.seqs to create new name and fastafile
1092                 string inputString = "fasta=" + inputFile;
1093                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1094                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1095                 m->mothurCalling = true;
1096         
1097                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1098                 uniqueCommand->execute();
1099                 
1100                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1101                 
1102                 delete uniqueCommand;
1103                 m->mothurCalling = false;
1104                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1105                 
1106                 nameFile = filenames["name"][0];
1107                 inputFile = filenames["fasta"][0];
1108                 
1109                 return nameFile;
1110         }
1111         catch(exception& e) {
1112                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
1113                 exit(1);
1114         }
1115 }
1116 //**********************************************************************************************************************
1117 int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1118         try {
1119                 
1120                 int totalSeqs = 0;
1121                 int numChimeras = 0;
1122                 
1123                 for (int i = start; i < end; i++) {
1124                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
1125             
1126                         int error;
1127             if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1128             else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; } }
1129                         
1130                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1131                         totalSeqs += numSeqs;
1132                         
1133                         if (m->control_pressed) { return 0; }
1134                         
1135                         //remove file made for uchime
1136                         if (!m->debug) {  m->mothurRemove(filename);  }
1137             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1138                         
1139                         //append files
1140                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1141                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1142                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1143                         
1144                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1145                 }       
1146                 return totalSeqs;
1147                 
1148         }
1149         catch(exception& e) {
1150                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1151                 exit(1);
1152         }
1153 }       
1154 //**********************************************************************************************************************
1155
1156 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1157         try {
1158                 
1159                 outputFName = m->getFullPathName(outputFName);
1160                 filename = m->getFullPathName(filename);
1161                 alns = m->getFullPathName(alns);
1162                 
1163                 //to allow for spaces in the path
1164                 outputFName = "\"" + outputFName + "\"";
1165                 filename = "\"" + filename + "\"";
1166                 alns = "\"" + alns + "\"";
1167                                 
1168                 vector<char*> cPara;
1169                 
1170                 string uchimeCommand = uchimeLocation;
1171 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1172                 uchimeCommand += " ";
1173 #else
1174                 uchimeCommand = "\"" + uchimeCommand + "\"";
1175 #endif
1176                 
1177                 char* tempUchime;
1178                 tempUchime= new char[uchimeCommand.length()+1]; 
1179                 *tempUchime = '\0';
1180                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1181                 cPara.push_back(tempUchime);
1182                 
1183                 char* tempIn = new char[8]; 
1184                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1185                 //strcpy(tempIn, "--input"); 
1186                 cPara.push_back(tempIn);
1187                 char* temp = new char[filename.length()+1];
1188                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1189                 //strcpy(temp, filename.c_str());
1190                 cPara.push_back(temp);
1191                 
1192                 //are you using a reference file
1193                 if (templatefile != "self") {
1194                         //add reference file
1195                         char* tempRef = new char[5]; 
1196                         //strcpy(tempRef, "--db"); 
1197                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1198                         cPara.push_back(tempRef);  
1199                         char* tempR = new char[templatefile.length()+1];
1200                         //strcpy(tempR, templatefile.c_str());
1201                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1202                         cPara.push_back(tempR);
1203                 }
1204                 
1205                 char* tempO = new char[12]; 
1206                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1207                 //strcpy(tempO, "--uchimeout"); 
1208                 cPara.push_back(tempO);
1209                 char* tempout = new char[outputFName.length()+1];
1210                 //strcpy(tempout, outputFName.c_str());
1211                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1212                 cPara.push_back(tempout);
1213                 
1214                 if (chimealns) {
1215                         char* tempA = new char[13]; 
1216                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1217                         //strcpy(tempA, "--uchimealns"); 
1218                         cPara.push_back(tempA);
1219                         char* tempa = new char[alns.length()+1];
1220                         //strcpy(tempa, alns.c_str());
1221                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1222                         cPara.push_back(tempa);
1223                 }
1224                 
1225                 if (useAbskew) {
1226                         char* tempskew = new char[9];
1227                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1228                         //strcpy(tempskew, "--abskew"); 
1229                         cPara.push_back(tempskew);
1230                         char* tempSkew = new char[abskew.length()+1];
1231                         //strcpy(tempSkew, abskew.c_str());
1232                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1233                         cPara.push_back(tempSkew);
1234                 }
1235                 
1236                 if (useMinH) {
1237                         char* tempminh = new char[7]; 
1238                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1239                         //strcpy(tempminh, "--minh"); 
1240                         cPara.push_back(tempminh);
1241                         char* tempMinH = new char[minh.length()+1];
1242                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1243                         //strcpy(tempMinH, minh.c_str());
1244                         cPara.push_back(tempMinH);
1245                 }
1246                 
1247                 if (useMindiv) {
1248                         char* tempmindiv = new char[9]; 
1249                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1250                         //strcpy(tempmindiv, "--mindiv"); 
1251                         cPara.push_back(tempmindiv);
1252                         char* tempMindiv = new char[mindiv.length()+1];
1253                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1254                         //strcpy(tempMindiv, mindiv.c_str());
1255                         cPara.push_back(tempMindiv);
1256                 }
1257                 
1258                 if (useXn) {
1259                         char* tempxn = new char[5]; 
1260                         //strcpy(tempxn, "--xn"); 
1261                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1262                         cPara.push_back(tempxn);
1263                         char* tempXn = new char[xn.length()+1];
1264                         //strcpy(tempXn, xn.c_str());
1265                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1266                         cPara.push_back(tempXn);
1267                 }
1268                 
1269                 if (useDn) {
1270                         char* tempdn = new char[5]; 
1271                         //strcpy(tempdn, "--dn"); 
1272                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1273                         cPara.push_back(tempdn);
1274                         char* tempDn = new char[dn.length()+1];
1275                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1276                         //strcpy(tempDn, dn.c_str());
1277                         cPara.push_back(tempDn);
1278                 }
1279                 
1280                 if (useXa) {
1281                         char* tempxa = new char[5]; 
1282                         //strcpy(tempxa, "--xa"); 
1283                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1284                         cPara.push_back(tempxa);
1285                         char* tempXa = new char[xa.length()+1];
1286                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1287                         //strcpy(tempXa, xa.c_str());
1288                         cPara.push_back(tempXa);
1289                 }
1290                 
1291                 if (useChunks) {
1292                         char* tempchunks = new char[9]; 
1293                         //strcpy(tempchunks, "--chunks"); 
1294                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1295                         cPara.push_back(tempchunks);
1296                         char* tempChunks = new char[chunks.length()+1];
1297                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1298                         //strcpy(tempChunks, chunks.c_str());
1299                         cPara.push_back(tempChunks);
1300                 }
1301                 
1302                 if (useMinchunk) {
1303                         char* tempminchunk = new char[11]; 
1304                         //strcpy(tempminchunk, "--minchunk"); 
1305                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1306                         cPara.push_back(tempminchunk);
1307                         char* tempMinchunk = new char[minchunk.length()+1];
1308                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1309                         //strcpy(tempMinchunk, minchunk.c_str());
1310                         cPara.push_back(tempMinchunk);
1311                 }
1312                 
1313                 if (useIdsmoothwindow) {
1314                         char* tempidsmoothwindow = new char[17]; 
1315                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1316                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1317                         cPara.push_back(tempidsmoothwindow);
1318                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1319                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1320                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1321                         cPara.push_back(tempIdsmoothwindow);
1322                 }
1323                 
1324                 /*if (useMinsmoothid) {
1325                         char* tempminsmoothid = new char[14]; 
1326                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1327                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1328                         cPara.push_back(tempminsmoothid);
1329                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1330                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1331                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1332                         cPara.push_back(tempMinsmoothid);
1333                 }*/
1334                 
1335                 if (useMaxp) {
1336                         char* tempmaxp = new char[7]; 
1337                         //strcpy(tempmaxp, "--maxp"); 
1338                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1339                         cPara.push_back(tempmaxp);
1340                         char* tempMaxp = new char[maxp.length()+1];
1341                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1342                         //strcpy(tempMaxp, maxp.c_str());
1343                         cPara.push_back(tempMaxp);
1344                 }
1345                 
1346                 if (!skipgaps) {
1347                         char* tempskipgaps = new char[13]; 
1348                         //strcpy(tempskipgaps, "--[no]skipgaps");
1349                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1350                         cPara.push_back(tempskipgaps);
1351                 }
1352                 
1353                 if (!skipgaps2) {
1354                         char* tempskipgaps2 = new char[14]; 
1355                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1356                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1357                         cPara.push_back(tempskipgaps2);
1358                 }
1359                 
1360                 if (useMinlen) {
1361                         char* tempminlen = new char[9]; 
1362                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1363                         //strcpy(tempminlen, "--minlen"); 
1364                         cPara.push_back(tempminlen);
1365                         char* tempMinlen = new char[minlen.length()+1];
1366                         //strcpy(tempMinlen, minlen.c_str());
1367                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1368                         cPara.push_back(tempMinlen);
1369                 }
1370                 
1371                 if (useMaxlen) {
1372                         char* tempmaxlen = new char[9]; 
1373                         //strcpy(tempmaxlen, "--maxlen"); 
1374                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1375                         cPara.push_back(tempmaxlen);
1376                         char* tempMaxlen = new char[maxlen.length()+1];
1377                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1378                         //strcpy(tempMaxlen, maxlen.c_str());
1379                         cPara.push_back(tempMaxlen);
1380                 }
1381                 
1382                 if (ucl) {
1383                         char* tempucl = new char[5]; 
1384                         strcpy(tempucl, "--ucl"); 
1385                         cPara.push_back(tempucl);
1386                 }
1387                 
1388                 if (useQueryfract) {
1389                         char* tempqueryfract = new char[13]; 
1390                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1391                         //strcpy(tempqueryfract, "--queryfract"); 
1392                         cPara.push_back(tempqueryfract);
1393                         char* tempQueryfract = new char[queryfract.length()+1];
1394                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1395                         //strcpy(tempQueryfract, queryfract.c_str());
1396                         cPara.push_back(tempQueryfract);
1397                 }
1398                 
1399                 
1400                 char** uchimeParameters;
1401                 uchimeParameters = new char*[cPara.size()];
1402                 string commandString = "";
1403                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1404                 //int numArgs = cPara.size();
1405                 
1406                 //uchime_main(numArgs, uchimeParameters); 
1407                 //cout << "commandString = " << commandString << endl;
1408 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1409 #else
1410                 commandString = "\"" + commandString + "\"";
1411 #endif
1412         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1413                 system(commandString.c_str());
1414                 
1415                 //free memory
1416                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1417                 delete[] uchimeParameters; 
1418                 
1419                 //remove "" from filenames
1420                 outputFName = outputFName.substr(1, outputFName.length()-2);
1421                 filename = filename.substr(1, filename.length()-2);
1422                 alns = alns.substr(1, alns.length()-2);
1423                 
1424                 if (m->control_pressed) { return 0; }
1425                 
1426                 //create accnos file from uchime results
1427                 ifstream in; 
1428                 m->openInputFile(outputFName, in);
1429                 
1430                 ofstream out;
1431                 m->openOutputFile(accnos, out);
1432                 
1433                 int num = 0;
1434                 numChimeras = 0;
1435                 while(!in.eof()) {
1436                         
1437                         if (m->control_pressed) { break; }
1438                         
1439                         string name = "";
1440                         string chimeraFlag = "";
1441                         in >> chimeraFlag >> name;
1442                         
1443                         //fix name if needed
1444                         if (templatefile == "self") { 
1445                                 name = name.substr(0, name.length()-1); //rip off last /
1446                                 name = name.substr(0, name.find_last_of('/'));
1447                         }
1448                         
1449                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1450                         m->gobble(in);
1451                         
1452                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1453                         num++;
1454                 }
1455                 in.close();
1456                 out.close();
1457                 
1458                 return num;
1459         }
1460         catch(exception& e) {
1461                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1462                 exit(1);
1463         }
1464 }
1465 /**************************************************************************************************/
1466
1467 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1468         try {
1469                 
1470                 processIDS.clear();
1471                 int process = 1;
1472                 int num = 0;
1473                 vector<string> files;
1474                 
1475 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1476                 //break up file into multiple files
1477                 m->divideFile(filename, processors, files);
1478                 
1479                 if (m->control_pressed) {  return 0;  }
1480                                 
1481                 //loop through and create all the processes you want
1482                 while (process != processors) {
1483                         int pid = fork();
1484                         
1485                         if (pid > 0) {
1486                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1487                                 process++;
1488                         }else if (pid == 0){
1489                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1490                                 
1491                                 //pass numSeqs to parent
1492                                 ofstream out;
1493                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1494                                 m->openOutputFile(tempFile, out);
1495                                 out << num << endl;
1496                                 out << numChimeras << endl;
1497                                 out.close();
1498                                 
1499                                 exit(0);
1500                         }else { 
1501                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1502                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1503                                 exit(0);
1504                         }
1505                 }
1506                 
1507                 //do my part
1508                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1509                 
1510                 //force parent to wait until all the processes are done
1511                 for (int i=0;i<processIDS.size();i++) { 
1512                         int temp = processIDS[i];
1513                         wait(&temp);
1514                 }
1515                 
1516                 for (int i = 0; i < processIDS.size(); i++) {
1517                         ifstream in;
1518                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1519                         m->openInputFile(tempFile, in);
1520                         if (!in.eof()) { 
1521                                 int tempNum = 0; 
1522                                 in >> tempNum; m->gobble(in);
1523                                 num += tempNum; 
1524                                 in >> tempNum;
1525                                 numChimeras += tempNum;
1526                         }
1527                         in.close(); m->mothurRemove(tempFile);
1528                 }
1529 #else
1530                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1531                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1532                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1533                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1534                 
1535                 //divide file
1536                 int count = 0;
1537                 int spot = 0;
1538                 map<int, ofstream*> filehandles;
1539                 map<int, ofstream*>::iterator it3;
1540                 
1541                 ofstream* temp;
1542                 for (int i = 0; i < processors; i++) {
1543                         temp = new ofstream;
1544                         filehandles[i] = temp;
1545                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1546                         files.push_back(filename+toString(i)+".temp");
1547                 }
1548                 
1549                 ifstream in;
1550                 m->openInputFile(filename, in);
1551                 
1552                 while(!in.eof()) {
1553                         
1554                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1555                         
1556                         Sequence tempSeq(in); m->gobble(in); 
1557                         
1558                         if (tempSeq.getName() != "") {
1559                                 tempSeq.printSequence(*(filehandles[spot])); 
1560                                 spot++; count++;
1561                                 if (spot == processors) { spot = 0; }
1562                         }
1563                 }
1564                 in.close();
1565                 
1566                 //delete memory
1567                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1568                         (*(it3->second)).close();
1569                         delete it3->second;
1570                 }
1571                 
1572                 //sanity check for number of processors
1573                 if (count < processors) { processors = count; }
1574                 
1575                 vector<uchimeData*> pDataArray; 
1576                 DWORD   dwThreadIdArray[processors-1];
1577                 HANDLE  hThreadArray[processors-1]; 
1578                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1579                 
1580                 //Create processor worker threads.
1581                 for( int i=1; i<processors; i++ ){
1582                         // Allocate memory for thread data.
1583                         string extension = toString(i) + ".temp";
1584                         
1585                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1586                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1587                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1588                         
1589                         pDataArray.push_back(tempUchime);
1590                         processIDS.push_back(i);
1591                         
1592                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1593                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1594                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1595                 }
1596                 
1597                 
1598                 //using the main process as a worker saves time and memory
1599                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1600                 
1601                 //Wait until all threads have terminated.
1602                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1603                 
1604                 //Close all thread handles and free memory allocations.
1605                 for(int i=0; i < pDataArray.size(); i++){
1606                         num += pDataArray[i]->count;
1607                         numChimeras += pDataArray[i]->numChimeras;
1608                         CloseHandle(hThreadArray[i]);
1609                         delete pDataArray[i];
1610                 }
1611 #endif          
1612                 
1613                 //append output files
1614                 for(int i=0;i<processIDS.size();i++){
1615                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1616                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1617                         
1618                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1619                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1620                         
1621                         if (chimealns) {
1622                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1623                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1624                         }
1625                 }
1626                 
1627                 //get rid of the file pieces.
1628                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1629                 return num;     
1630         }
1631         catch(exception& e) {
1632                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1633                 exit(1);
1634         }
1635 }
1636 /**************************************************************************************************/
1637
1638 int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1639         try {
1640                 
1641                 processIDS.clear();
1642                 int process = 1;
1643                 int num = 0;
1644                 
1645                 //sanity check
1646                 if (groups.size() < processors) { processors = groups.size(); }
1647                 
1648                 //divide the groups between the processors
1649                 vector<linePair> lines;
1650                 int numGroupsPerProcessor = groups.size() / processors;
1651                 for (int i = 0; i < processors; i++) {
1652                         int startIndex =  i * numGroupsPerProcessor;
1653                         int endIndex = (i+1) * numGroupsPerProcessor;
1654                         if(i == (processors - 1)){      endIndex = groups.size();       }
1655                         lines.push_back(linePair(startIndex, endIndex));
1656                 }
1657                 
1658 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1659                                 
1660                 //loop through and create all the processes you want
1661                 while (process != processors) {
1662                         int pid = fork();
1663                         
1664                         if (pid > 0) {
1665                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1666                                 process++;
1667                         }else if (pid == 0){
1668                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1669                                 
1670                                 //pass numSeqs to parent
1671                                 ofstream out;
1672                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1673                                 m->openOutputFile(tempFile, out);
1674                                 out << num << endl;
1675                                 out.close();
1676                                 
1677                                 exit(0);
1678                         }else { 
1679                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1680                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1681                                 exit(0);
1682                         }
1683                 }
1684                 
1685                 //do my part
1686                 num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1687                 
1688                 //force parent to wait until all the processes are done
1689                 for (int i=0;i<processIDS.size();i++) { 
1690                         int temp = processIDS[i];
1691                         wait(&temp);
1692                 }
1693                 
1694                 for (int i = 0; i < processIDS.size(); i++) {
1695                         ifstream in;
1696                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1697                         m->openInputFile(tempFile, in);
1698                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1699                         in.close(); m->mothurRemove(tempFile);
1700                 }
1701                                 
1702 #else
1703                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1704                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1705                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1706                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1707                 
1708                 vector<uchimeData*> pDataArray; 
1709                 DWORD   dwThreadIdArray[processors-1];
1710                 HANDLE  hThreadArray[processors-1]; 
1711                 
1712                 //Create processor worker threads.
1713                 for( int i=1; i<processors; i++ ){
1714                         // Allocate memory for thread data.
1715                         string extension = toString(i) + ".temp";
1716                         
1717                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1718                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1719                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1720                         
1721                         pDataArray.push_back(tempUchime);
1722                         processIDS.push_back(i);
1723                         
1724                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1725                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1726                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1727                 }
1728                 
1729                 
1730                 //using the main process as a worker saves time and memory
1731                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1732                 
1733                 //Wait until all threads have terminated.
1734                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1735                 
1736                 //Close all thread handles and free memory allocations.
1737                 for(int i=0; i < pDataArray.size(); i++){
1738                         num += pDataArray[i]->count;
1739                         CloseHandle(hThreadArray[i]);
1740                         delete pDataArray[i];
1741                 }
1742 #endif          
1743                 
1744                                 
1745                 //append output files
1746                 for(int i=0;i<processIDS.size();i++){
1747                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1748                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1749                         
1750                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1751                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1752                         
1753                         if (chimealns) {
1754                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1755                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1756                         }
1757                 }
1758                 
1759                 return num;     
1760                 
1761         }
1762         catch(exception& e) {
1763                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1764                 exit(1);
1765         }
1766 }
1767 /**************************************************************************************************/
1768