]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 //#include "uc.h"
13 #include "sequence.hpp"
14 #include "referencedb.h"
15 #include "systemcommand.h"
16
17 //**********************************************************************************************************************
18 vector<string> ChimeraUchimeCommand::setParameters(){   
19         try {
20                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
21                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
22                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
28                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
29                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
30                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
31                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
32                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
33                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
34                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
35                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
36                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
37                 //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
38                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
39                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
40                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
41                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
42                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
43                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
44                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
45
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string ChimeraUchimeCommand::getHelpString(){   
57         try {
58                 string helpString = "";
59                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
60                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
61                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
62                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
63                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
64                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
67                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
68                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
69                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
70                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
71                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
72                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
73                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
74                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
75                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
76                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
77                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
78                 //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
79                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
80                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
81                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
82                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
83                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
84                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
85                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
86 #ifdef USE_MPI
87                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
88 #endif
89                 helpString += "The chimera.uchime command should be in the following format: \n";
90                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
91                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){    
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "chimera") {  outputFileName =  "uchime.chimeras"; }
111             else if (type == "accnos") {  outputFileName =  "uchime.accnos"; }
112             else if (type == "alns") {  outputFileName =  "uchime.alns"; }
113             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
114         }
115         return outputFileName;
116         }
117         catch(exception& e) {
118                 m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag");
119                 exit(1);
120         }
121 }
122 //**********************************************************************************************************************
123 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
124         try {
125                 abort = true; calledHelp = true;
126                 setParameters();
127                 vector<string> tempOutNames;
128                 outputTypes["chimera"] = tempOutNames;
129                 outputTypes["accnos"] = tempOutNames;
130                 outputTypes["alns"] = tempOutNames;
131         }
132         catch(exception& e) {
133                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
134                 exit(1);
135         }
136 }
137 //***************************************************************************************************************
138 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
139         try {
140                 abort = false; calledHelp = false; 
141                 ReferenceDB* rdb = ReferenceDB::getInstance();
142                 
143                 //allow user to run help
144                 if(option == "help") { help(); abort = true; calledHelp = true; }
145                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
146                 
147                 else {
148                         vector<string> myArray = setParameters();
149                         
150                         OptionParser parser(option);
151                         map<string,string> parameters = parser.getParameters();
152                         
153                         ValidParameters validParameter("chimera.uchime");
154                         map<string,string>::iterator it;
155                         
156                         //check to make sure all parameters are valid for command
157                         for (it = parameters.begin(); it != parameters.end(); it++) { 
158                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
159                         }
160                         
161                         vector<string> tempOutNames;
162                         outputTypes["chimera"] = tempOutNames;
163                         outputTypes["accnos"] = tempOutNames;
164                         outputTypes["alns"] = tempOutNames;
165                         
166                         //if the user changes the input directory command factory will send this info to us in the output parameter 
167                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
168                         if (inputDir == "not found"){   inputDir = "";          }
169                         
170                         //check for required parameters
171                         fastafile = validParameter.validFile(parameters, "fasta", false);
172                         if (fastafile == "not found") {                                 
173                                 //if there is a current fasta file, use it
174                                 string filename = m->getFastaFile(); 
175                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
176                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
177                         }else { 
178                                 m->splitAtDash(fastafile, fastaFileNames);
179                                 
180                                 //go through files and make sure they are good, if not, then disregard them
181                                 for (int i = 0; i < fastaFileNames.size(); i++) {
182                                         
183                                         bool ignore = false;
184                                         if (fastaFileNames[i] == "current") { 
185                                                 fastaFileNames[i] = m->getFastaFile(); 
186                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
187                                                 else {  
188                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
189                                                         //erase from file list
190                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
191                                                         i--;
192                                                 }
193                                         }
194                                         
195                                         if (!ignore) {
196                                                 
197                                                 if (inputDir != "") {
198                                                         string path = m->hasPath(fastaFileNames[i]);
199                                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
201                                                 }
202                                                 
203                                                 int ableToOpen;
204                                                 ifstream in;
205                                                 
206                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
207                                                 
208                                                 //if you can't open it, try default location
209                                                 if (ableToOpen == 1) {
210                                                         if (m->getDefaultPath() != "") { //default path is set
211                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
212                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
213                                                                 ifstream in2;
214                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
215                                                                 in2.close();
216                                                                 fastaFileNames[i] = tryPath;
217                                                         }
218                                                 }
219                                                 
220                                                 if (ableToOpen == 1) {
221                                                         if (m->getOutputDir() != "") { //default path is set
222                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
223                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
224                                                                 ifstream in2;
225                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
226                                                                 in2.close();
227                                                                 fastaFileNames[i] = tryPath;
228                                                         }
229                                                 }
230                                                 
231                                                 in.close();
232                                                 
233                                                 if (ableToOpen == 1) { 
234                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
235                                                         //erase from file list
236                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
237                                                         i--;
238                                                 }else {
239                                                         m->setFastaFile(fastaFileNames[i]);
240                                                 }
241                                         }
242                                 }
243                                 
244                                 //make sure there is at least one valid file left
245                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
246                         }
247                         
248                         
249                         //check for required parameters
250                         bool hasName = true;
251                         namefile = validParameter.validFile(parameters, "name", false);
252                         if (namefile == "not found") { namefile = "";  hasName = false; }
253                         else { 
254                                 m->splitAtDash(namefile, nameFileNames);
255                                 
256                                 //go through files and make sure they are good, if not, then disregard them
257                                 for (int i = 0; i < nameFileNames.size(); i++) {
258                                         
259                                         bool ignore = false;
260                                         if (nameFileNames[i] == "current") { 
261                                                 nameFileNames[i] = m->getNameFile(); 
262                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
263                                                 else {  
264                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
265                                                         //erase from file list
266                                                         nameFileNames.erase(nameFileNames.begin()+i);
267                                                         i--;
268                                                 }
269                                         }
270                                         
271                                         if (!ignore) {
272                                                 
273                                                 if (inputDir != "") {
274                                                         string path = m->hasPath(nameFileNames[i]);
275                                                         //if the user has not given a path then, add inputdir. else leave path alone.
276                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
277                                                 }
278                                                 
279                                                 int ableToOpen;
280                                                 ifstream in;
281                                                 
282                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
283                                                 
284                                                 //if you can't open it, try default location
285                                                 if (ableToOpen == 1) {
286                                                         if (m->getDefaultPath() != "") { //default path is set
287                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
288                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
289                                                                 ifstream in2;
290                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
291                                                                 in2.close();
292                                                                 nameFileNames[i] = tryPath;
293                                                         }
294                                                 }
295                                                 
296                                                 if (ableToOpen == 1) {
297                                                         if (m->getOutputDir() != "") { //default path is set
298                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
299                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
300                                                                 ifstream in2;
301                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
302                                                                 in2.close();
303                                                                 nameFileNames[i] = tryPath;
304                                                         }
305                                                 }
306                                                 
307                                                 in.close();
308                                                 
309                                                 if (ableToOpen == 1) { 
310                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
311                                                         //erase from file list
312                                                         nameFileNames.erase(nameFileNames.begin()+i);
313                                                         i--;
314                                                 }else {
315                                                         m->setNameFile(nameFileNames[i]);
316                                                 }
317                                         }
318                                 }
319                                 
320                                 //make sure there is at least one valid file left
321                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
322                         }
323                         
324                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
325                         
326                         bool hasGroup = true;
327                         groupfile = validParameter.validFile(parameters, "group", false);
328                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
329                         else { 
330                                 m->splitAtDash(groupfile, groupFileNames);
331                                 
332                                 //go through files and make sure they are good, if not, then disregard them
333                                 for (int i = 0; i < groupFileNames.size(); i++) {
334                                         
335                                         bool ignore = false;
336                                         if (groupFileNames[i] == "current") { 
337                                                 groupFileNames[i] = m->getGroupFile(); 
338                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
339                                                 else {  
340                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
341                                                         //erase from file list
342                                                         groupFileNames.erase(groupFileNames.begin()+i);
343                                                         i--;
344                                                 }
345                                         }
346                                         
347                                         if (!ignore) {
348                                                 
349                                                 if (inputDir != "") {
350                                                         string path = m->hasPath(groupFileNames[i]);
351                                                         //if the user has not given a path then, add inputdir. else leave path alone.
352                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
353                                                 }
354                                                 
355                                                 int ableToOpen;
356                                                 ifstream in;
357                                                 
358                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
359                                                 
360                                                 //if you can't open it, try default location
361                                                 if (ableToOpen == 1) {
362                                                         if (m->getDefaultPath() != "") { //default path is set
363                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
364                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
365                                                                 ifstream in2;
366                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
367                                                                 in2.close();
368                                                                 groupFileNames[i] = tryPath;
369                                                         }
370                                                 }
371                                                 
372                                                 if (ableToOpen == 1) {
373                                                         if (m->getOutputDir() != "") { //default path is set
374                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
375                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
376                                                                 ifstream in2;
377                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
378                                                                 in2.close();
379                                                                 groupFileNames[i] = tryPath;
380                                                         }
381                                                 }
382                                                 
383                                                 in.close();
384                                                 
385                                                 if (ableToOpen == 1) { 
386                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
387                                                         //erase from file list
388                                                         groupFileNames.erase(groupFileNames.begin()+i);
389                                                         i--;
390                                                 }else {
391                                                         m->setGroupFile(groupFileNames[i]);
392                                                 }
393                                         }
394                                 }
395                                 
396                                 //make sure there is at least one valid file left
397                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
398                         }
399                         
400                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
401                         
402                         
403                         //if the user changes the output directory command factory will send this info to us in the output parameter 
404                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
405                         
406                         string path;
407                         it = parameters.find("reference");
408                         //user has given a template file
409                         if(it != parameters.end()){ 
410                                 if (it->second == "self") { templatefile = "self"; }
411                                 else {
412                                         path = m->hasPath(it->second);
413                                         //if the user has not given a path then, add inputdir. else leave path alone.
414                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
415                                         
416                                         templatefile = validParameter.validFile(parameters, "reference", true);
417                                         if (templatefile == "not open") { abort = true; }
418                                         else if (templatefile == "not found") { //check for saved reference sequences
419                                                 if (rdb->getSavedReference() != "") {
420                                                         templatefile = rdb->getSavedReference();
421                                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
422                                                 }else {
423                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
424                                                         m->mothurOutEndLine();
425                                                         abort = true; 
426                                                 }
427                                         }
428                                 }
429                         }else if (hasName) {  templatefile = "self"; }
430                         else { 
431                                 if (rdb->getSavedReference() != "") {
432                                         templatefile = rdb->getSavedReference();
433                                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
434                                 }else {
435                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
436                                         m->mothurOutEndLine();
437                                         templatefile = ""; abort = true; 
438                                 } 
439                         }
440                                 
441                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
442                         m->setProcessors(temp);
443                         m->mothurConvert(temp, processors);
444                         
445                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
446                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
447                         
448                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
449                         chimealns = m->isTrue(temp); 
450                         
451                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
452                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
453                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
454                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
455                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
456                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
457                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
458                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
459                         //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);             if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
460                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
461                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
462                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
463                         
464                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
465                         ucl = m->isTrue(temp);
466                         
467                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
468                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
469                         
470                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
471                         skipgaps = m->isTrue(temp); 
472
473                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
474                         skipgaps2 = m->isTrue(temp); 
475                         
476                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
477                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
478                         
479                         //look for uchime exe
480                         path = m->argv;
481                         string tempPath = path;
482                         for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
483                         path = path.substr(0, (tempPath.find_last_of('m')));
484                         
485                         string uchimeCommand;
486 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
487                         uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
488             if (m->debug) { 
489                 m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
490                 Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
491                 newCommand->execute();
492                 delete newCommand;
493                 m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
494                 newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
495                 newCommand->execute();
496                 delete newCommand;
497             }
498 #else
499                         uchimeCommand = path + "uchime.exe";
500 #endif
501         
502                         //test to make sure uchime exists
503                         ifstream in;
504                         uchimeCommand = m->getFullPathName(uchimeCommand);
505                         int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close();
506                         if(ableToOpen == 1) {   
507                 m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n");
508                 //check to see if uchime is in the path??
509                 
510                 string uLocation = m->findProgramPath("uchime");
511                 
512                 
513                 ifstream in2;
514 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
515                 ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close();
516 #else
517                 ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close();
518 #endif
519
520                 if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } 
521                 else {  m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; }
522             }else {  uchimeLocation = uchimeCommand; }
523             
524             uchimeLocation = m->getFullPathName(uchimeLocation);
525         }
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
529                 exit(1);
530         }
531 }
532 //***************************************************************************************************************
533
534 int ChimeraUchimeCommand::execute(){
535         try{
536                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
537                 
538                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
539                 
540                 for (int s = 0; s < fastaFileNames.size(); s++) {
541                         
542                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
543                         
544                         int start = time(NULL); 
545                         string nameFile = "";
546                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
547                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
548                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
549                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("alns");
550                         string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
551                                 
552                         //you provided a groupfile
553                         string groupFile = "";
554                         if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
555                         
556                         if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template
557
558                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
559                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
560                                         nameFile = nameFileNames[s];
561                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
562                                                                                 
563                                 map<string, string> seqs;  
564                                 readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
565
566                                 //read namefile
567                                 vector<seqPriorityNode> nameMapCount;
568                                 int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
569                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
570                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  return 0; }
571                                 
572                                 printFile(nameMapCount, newFasta);
573                                 fastaFileNames[s] = newFasta;
574                         }
575                         
576                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
577                         
578                         if (groupFile != "") {
579                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
580                                         nameFile = nameFileNames[s];
581                                 }else { nameFile = getNamesFile(fastaFileNames[s]); }
582                                 
583                                 //Parse sequences by group
584                                 SequenceParser parser(groupFile, fastaFileNames[s], nameFile);
585                                 vector<string> groups = parser.getNamesOfGroups();
586                                         
587                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0; }
588                                                                 
589                                 //clears files
590                                 ofstream out, out1, out2;
591                                 m->openOutputFile(outputFileName, out); out.close(); 
592                                 m->openOutputFile(accnosFileName, out1); out1.close();
593                                 if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
594                                 int totalSeqs = 0;
595                                 
596                                 if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
597                                 else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]);                      }
598
599                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
600
601                                 int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
602                                 
603                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");  m->mothurOutEndLine();
604                                 m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
605                                 
606                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
607                                         
608                         }else{
609                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
610                         
611                                 int numSeqs = 0;
612                                 int numChimeras = 0;
613
614                                 if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
615                                 else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
616                                 
617                                 //add headings
618                                 ofstream out;
619                                 m->openOutputFile(outputFileName+".temp", out); 
620                                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
621                                 out.close();
622                                 
623                                 m->appendFiles(outputFileName, outputFileName+".temp");
624                                 m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
625                                 
626                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
627                         
628                                 //remove file made for uchime
629                                 if (templatefile == "self") {  m->mothurRemove(fastaFileNames[s]); }
630                         
631                                 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.");      m->mothurOutEndLine();
632                         }
633                         
634                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
635                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
636                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
637                 }
638         
639                 //set accnos file as new current accnosfile
640                 string current = "";
641                 itTypes = outputTypes.find("accnos");
642                 if (itTypes != outputTypes.end()) {
643                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
644                 }
645                 
646                 m->mothurOutEndLine();
647                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
648                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
649                 m->mothurOutEndLine();
650                 
651                 return 0;
652                 
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
656                 exit(1);
657         }
658 }
659 //**********************************************************************************************************************
660 int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){
661         try {
662                 map<string, string> uniqueNames = parser.getAllSeqsMap();
663                 map<string, string>::iterator itUnique;
664                 int total = 0;
665                 
666                 //edit accnos file
667                 ifstream in2; 
668                 m->openInputFile(accnosFileName, in2);
669                 
670                 ofstream out2;
671                 m->openOutputFile(accnosFileName+".temp", out2);
672                 
673                 string name;
674                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
675                 set<string>::iterator itNames;
676                 set<string> chimerasInFile;
677                 set<string>::iterator itChimeras;
678
679                 
680                 while (!in2.eof()) {
681                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
682                         
683                         in2 >> name; m->gobble(in2);
684                         
685                         //find unique name
686                         itUnique = uniqueNames.find(name);
687                         
688                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
689                         else {
690                                 itChimeras = chimerasInFile.find((itUnique->second));
691                                 
692                                 if (itChimeras == chimerasInFile.end()) {
693                                         out2 << itUnique->second << endl;
694                                         chimerasInFile.insert((itUnique->second));
695                                         total++;
696                                 }
697                         }
698                 }
699                 in2.close();
700                 out2.close();
701                 
702                 m->mothurRemove(accnosFileName);
703                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
704                 
705                 
706                 
707                 //edit chimera file
708                 ifstream in; 
709                 m->openInputFile(outputFileName, in);
710                 
711                 ofstream out;
712                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
713                 out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
714                 
715                 float temp1;
716                 string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
717                 name = "";
718                 namesInFile.clear();    
719                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
720                 /*                                                                              1       2       3       4       5       6       7       8       9       10      11      12      13      14      15
721                  0.000000       F11Fcsw_33372/ab=18/            *       *       *       *       *       *       *       *       *       *       *       *       *       *       N
722                  0.018300       F11Fcsw_14980/ab=16/            F11Fcsw_1915/ab=35/     F11Fcsw_6032/ab=42/     79.9    78.7    78.2    78.7    79.2    3       0       5       11      10      20      1.46    N
723                 */
724                 
725                 while (!in.eof()) {
726                         
727                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
728                         
729                         bool print = false;
730                         in >> temp1;    m->gobble(in);
731                         in >> name;             m->gobble(in);
732                         in >> parent1;  m->gobble(in);
733                         in >> parent2;  m->gobble(in);
734                         in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
735                         m->gobble(in);
736                         
737                         //parse name - name will look like U68590/ab=1/
738                         string restOfName = "";
739                         int pos = name.find_first_of('/');
740                         if (pos != string::npos) {
741                                 restOfName = name.substr(pos);
742                                 name = name.substr(0, pos);
743                         }
744                         
745                         //find unique name
746                         itUnique = uniqueNames.find(name);
747                         
748                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
749                         else {
750                                 name = itUnique->second;
751                                 //is this name already in the file
752                                 itNames = namesInFile.find((name));
753                                 
754                                 if (itNames == namesInFile.end()) { //no not in file
755                                         if (flag == "N") { //are you really a no??
756                                                 //is this sequence really not chimeric??
757                                                 itChimeras = chimerasInFile.find(name);
758                                                 
759                                                 //then you really are a no so print, otherwise skip
760                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
761                                         }else{ print = true; }
762                                 }
763                         }
764                         
765                         if (print) {
766                                 out << temp1 << '\t' << name << restOfName << '\t';
767                                 namesInFile.insert(name);
768                                 
769                                 //parse parent1 names
770                                 if (parent1 != "*") {
771                                         restOfName = "";
772                                         pos = parent1.find_first_of('/');
773                                         if (pos != string::npos) {
774                                                 restOfName = parent1.substr(pos);
775                                                 parent1 = parent1.substr(0, pos);
776                                         }
777                                         
778                                         itUnique = uniqueNames.find(parent1);
779                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
780                                         else {  out << itUnique->second << restOfName << '\t';  }
781                                 }else { out << parent1 << '\t'; }
782                                 
783                                 //parse parent2 names
784                                 if (parent2 != "*") {
785                                         restOfName = "";
786                                         pos = parent2.find_first_of('/');
787                                         if (pos != string::npos) {
788                                                 restOfName = parent2.substr(pos);
789                                                 parent2 = parent2.substr(0, pos);
790                                         }
791                                         
792                                         itUnique = uniqueNames.find(parent2);
793                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
794                                         else {  out << itUnique->second << restOfName << '\t';  }
795                                 }else { out << parent2 << '\t'; }
796                                 
797                                 out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;    
798                         }
799                 }
800                 in.close();
801                 out.close();
802                 
803                 m->mothurRemove(outputFileName);
804                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
805                 
806                                 
807                 //edit anls file
808                 //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
809                 /*
810                  ------------------------------------------------------------------------
811                  Query   (  179 nt) F21Fcsw_11639/ab=591/
812                  ParentA (  179 nt) F11Fcsw_6529/ab=1625/
813                  ParentB (  181 nt) F21Fcsw_12128/ab=1827/
814                  
815                  A     1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
816                  Q     1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
817                  B     1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
818                  Diffs      N    N    A N?N   N N  NNN  N?NB   N ?NaNNN          B B NN    NNNN          
819                  Votes      0    0    + 000   0 0  000  000+   0 00!000            + 00    0000          
820                  Model   AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
821                  
822                  A    79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
823                  Q    79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
824                  B    81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
825                  Diffs      NNN     N N  N                   N  N BB    NNN                              
826                  Votes      000     0 0  0                   0  0 ++    000                              
827                  Model   BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
828                  
829                  A   159 TGGAACTGAGACACGGTCCAA 179
830                  Q   159 TGGAACTGAGACACGGTCCAA 179
831                  B   161 TGGAACTGAGACACGGTCCAA 181
832                  Diffs                        
833                  Votes                        
834                  Model   BBBBBBBBBBBBBBBBBBBBB
835                  
836                  Ids.  QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
837                  Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
838                 */
839                 if (chimealns) {
840                         ifstream in3; 
841                         m->openInputFile(alnsFileName, in3);
842                 
843                         ofstream out3;
844                         m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
845                 
846                         name = "";
847                         namesInFile.clear();
848                         string line = "";
849                         
850                         while (!in3.eof()) {
851                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
852                                 
853                                 line = "";
854                                 line = m->getline(in3); 
855                                 string temp = "";
856                                 
857                                 if (line != "") {
858                                         istringstream iss(line);
859                                         iss >> temp;
860                                         
861                                         //are you a name line
862                                         if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
863                                                 int spot = 0;
864                                                 for (int i = 0; i < line.length(); i++) {
865                                                         spot = i;
866                                                         if (line[i] == ')') { break; }
867                                                         else { out3 << line[i]; }
868                                                 }
869                                                 
870                                                 if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
871                                                 else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
872                                                 else {
873                                                         out << line[spot] << line[spot+1];
874                                                         
875                                                         name = line.substr(spot+2);
876                                                         
877                                                         //parse name - name will either look like U68590/ab=1/ or U68590
878                                                         string restOfName = "";
879                                                         int pos = name.find_first_of('/');
880                                                         if (pos != string::npos) {
881                                                                 restOfName = name.substr(pos);
882                                                                 name = name.substr(0, pos);
883                                                         }
884                                                         
885                                                         //find unique name
886                                                         itUnique = uniqueNames.find(name);
887                                                         
888                                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true;  }
889                                                         else {
890                                                                 //only limit repeats on query names
891                                                                 if (temp == "Query") {
892                                                                         itNames = namesInFile.find((itUnique->second));
893                                                                         
894                                                                         if (itNames == namesInFile.end()) {
895                                                                                 out << itUnique->second << restOfName << endl;
896                                                                                 namesInFile.insert((itUnique->second));
897                                                                         }
898                                                                 }else { out << itUnique->second << restOfName << endl;  }
899                                                         }
900                                                         
901                                                 }
902                                                 
903                                         }else { //not need to alter line
904                                                 out3 << line << endl;
905                                         }
906                                 }else { out3 << endl; }
907                         }
908                         in3.close();
909                         out3.close();
910                         
911                         m->mothurRemove(alnsFileName);
912                         rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
913                 }
914                 
915                 return total;
916         }
917         catch(exception& e) {
918                 m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults");
919                 exit(1);
920         }
921 }       
922 //**********************************************************************************************************************
923 int ChimeraUchimeCommand::printFile(vector<seqPriorityNode>& nameMapCount, string filename){
924         try {
925                 
926                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
927                 
928                 ofstream out;
929                 m->openOutputFile(filename, out);
930                 
931                 //print new file in order of
932                 for (int i = 0; i < nameMapCount.size(); i++) {
933                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
934                 }
935                 out.close();
936                 
937                 return 0;
938         }
939         catch(exception& e) {
940                 m->errorOut(e, "ChimeraUchimeCommand", "printFile");
941                 exit(1);
942         }
943 }       
944 //**********************************************************************************************************************
945 int ChimeraUchimeCommand::readFasta(string filename, map<string, string>& seqs){
946         try {
947                 //create input file for uchime
948                 //read through fastafile and store info
949                 ifstream in;
950                 m->openInputFile(filename, in);
951                 
952                 while (!in.eof()) {
953                         
954                         if (m->control_pressed) { in.close(); return 0; }
955                         
956                         Sequence seq(in); m->gobble(in);
957                         seqs[seq.getName()] = seq.getAligned();
958                 }
959                 in.close();
960                 
961                 return 0;
962         }
963         catch(exception& e) {
964                 m->errorOut(e, "ChimeraUchimeCommand", "readFasta");
965                 exit(1);
966         }
967 }       
968 //**********************************************************************************************************************
969
970 string ChimeraUchimeCommand::getNamesFile(string& inputFile){
971         try {
972                 string nameFile = "";
973                 
974                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
975                 
976                 //use unique.seqs to create new name and fastafile
977                 string inputString = "fasta=" + inputFile;
978                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
979                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
980                 m->mothurCalling = true;
981         
982                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
983                 uniqueCommand->execute();
984                 
985                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
986                 
987                 delete uniqueCommand;
988                 m->mothurCalling = false;
989                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
990                 
991                 nameFile = filenames["name"][0];
992                 inputFile = filenames["fasta"][0];
993                 
994                 return nameFile;
995         }
996         catch(exception& e) {
997                 m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile");
998                 exit(1);
999         }
1000 }
1001 //**********************************************************************************************************************
1002 int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector<string> groups){
1003         try {
1004                 
1005                 int totalSeqs = 0;
1006                 int numChimeras = 0;
1007                 
1008                 for (int i = start; i < end; i++) {
1009                         int start = time(NULL);  if (m->control_pressed) {  return 0; }
1010                         
1011                         int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) {  return 0; }
1012                         
1013                         int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
1014                         totalSeqs += numSeqs;
1015                         
1016                         if (m->control_pressed) { return 0; }
1017                         
1018                         //remove file made for uchime
1019                         if (!m->debug) {  m->mothurRemove(filename);  }
1020             else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
1021                         
1022                         //append files
1023                         m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
1024                         m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
1025                         if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
1026                         
1027                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + ".");    m->mothurOutEndLine();                                  
1028                 }       
1029                 
1030                 return totalSeqs;
1031                 
1032         }
1033         catch(exception& e) {
1034                 m->errorOut(e, "ChimeraUchimeCommand", "driverGroups");
1035                 exit(1);
1036         }
1037 }       
1038 //**********************************************************************************************************************
1039
1040 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
1041         try {
1042                 
1043                 outputFName = m->getFullPathName(outputFName);
1044                 filename = m->getFullPathName(filename);
1045                 alns = m->getFullPathName(alns);
1046                 
1047                 //to allow for spaces in the path
1048                 outputFName = "\"" + outputFName + "\"";
1049                 filename = "\"" + filename + "\"";
1050                 alns = "\"" + alns + "\"";
1051                                 
1052                 vector<char*> cPara;
1053                 
1054                 string uchimeCommand = uchimeLocation;
1055 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1056                 uchimeCommand += " ";
1057 #else
1058                 uchimeCommand = "\"" + uchimeCommand + "\"";
1059 #endif
1060                 
1061                 char* tempUchime;
1062                 tempUchime= new char[uchimeCommand.length()+1]; 
1063                 *tempUchime = '\0';
1064                 strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
1065                 cPara.push_back(tempUchime);
1066                 
1067                 char* tempIn = new char[8]; 
1068                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
1069                 //strcpy(tempIn, "--input"); 
1070                 cPara.push_back(tempIn);
1071                 char* temp = new char[filename.length()+1];
1072                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
1073                 //strcpy(temp, filename.c_str());
1074                 cPara.push_back(temp);
1075                 
1076                 //are you using a reference file
1077                 if (templatefile != "self") {
1078                         //add reference file
1079                         char* tempRef = new char[5]; 
1080                         //strcpy(tempRef, "--db"); 
1081                         *tempRef = '\0'; strncat(tempRef, "--db", 4);
1082                         cPara.push_back(tempRef);  
1083                         char* tempR = new char[templatefile.length()+1];
1084                         //strcpy(tempR, templatefile.c_str());
1085                         *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
1086                         cPara.push_back(tempR);
1087                 }
1088                 
1089                 char* tempO = new char[12]; 
1090                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
1091                 //strcpy(tempO, "--uchimeout"); 
1092                 cPara.push_back(tempO);
1093                 char* tempout = new char[outputFName.length()+1];
1094                 //strcpy(tempout, outputFName.c_str());
1095                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
1096                 cPara.push_back(tempout);
1097                 
1098                 if (chimealns) {
1099                         char* tempA = new char[13]; 
1100                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
1101                         //strcpy(tempA, "--uchimealns"); 
1102                         cPara.push_back(tempA);
1103                         char* tempa = new char[alns.length()+1];
1104                         //strcpy(tempa, alns.c_str());
1105                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
1106                         cPara.push_back(tempa);
1107                 }
1108                 
1109                 if (useAbskew) {
1110                         char* tempskew = new char[9];
1111                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
1112                         //strcpy(tempskew, "--abskew"); 
1113                         cPara.push_back(tempskew);
1114                         char* tempSkew = new char[abskew.length()+1];
1115                         //strcpy(tempSkew, abskew.c_str());
1116                         *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
1117                         cPara.push_back(tempSkew);
1118                 }
1119                 
1120                 if (useMinH) {
1121                         char* tempminh = new char[7]; 
1122                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
1123                         //strcpy(tempminh, "--minh"); 
1124                         cPara.push_back(tempminh);
1125                         char* tempMinH = new char[minh.length()+1];
1126                         *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
1127                         //strcpy(tempMinH, minh.c_str());
1128                         cPara.push_back(tempMinH);
1129                 }
1130                 
1131                 if (useMindiv) {
1132                         char* tempmindiv = new char[9]; 
1133                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
1134                         //strcpy(tempmindiv, "--mindiv"); 
1135                         cPara.push_back(tempmindiv);
1136                         char* tempMindiv = new char[mindiv.length()+1];
1137                         *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
1138                         //strcpy(tempMindiv, mindiv.c_str());
1139                         cPara.push_back(tempMindiv);
1140                 }
1141                 
1142                 if (useXn) {
1143                         char* tempxn = new char[5]; 
1144                         //strcpy(tempxn, "--xn"); 
1145                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
1146                         cPara.push_back(tempxn);
1147                         char* tempXn = new char[xn.length()+1];
1148                         //strcpy(tempXn, xn.c_str());
1149                         *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
1150                         cPara.push_back(tempXn);
1151                 }
1152                 
1153                 if (useDn) {
1154                         char* tempdn = new char[5]; 
1155                         //strcpy(tempdn, "--dn"); 
1156                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
1157                         cPara.push_back(tempdn);
1158                         char* tempDn = new char[dn.length()+1];
1159                         *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
1160                         //strcpy(tempDn, dn.c_str());
1161                         cPara.push_back(tempDn);
1162                 }
1163                 
1164                 if (useXa) {
1165                         char* tempxa = new char[5]; 
1166                         //strcpy(tempxa, "--xa"); 
1167                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
1168                         cPara.push_back(tempxa);
1169                         char* tempXa = new char[xa.length()+1];
1170                         *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length());
1171                         //strcpy(tempXa, xa.c_str());
1172                         cPara.push_back(tempXa);
1173                 }
1174                 
1175                 if (useChunks) {
1176                         char* tempchunks = new char[9]; 
1177                         //strcpy(tempchunks, "--chunks"); 
1178                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
1179                         cPara.push_back(tempchunks);
1180                         char* tempChunks = new char[chunks.length()+1];
1181                         *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length());
1182                         //strcpy(tempChunks, chunks.c_str());
1183                         cPara.push_back(tempChunks);
1184                 }
1185                 
1186                 if (useMinchunk) {
1187                         char* tempminchunk = new char[11]; 
1188                         //strcpy(tempminchunk, "--minchunk"); 
1189                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
1190                         cPara.push_back(tempminchunk);
1191                         char* tempMinchunk = new char[minchunk.length()+1];
1192                         *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length());
1193                         //strcpy(tempMinchunk, minchunk.c_str());
1194                         cPara.push_back(tempMinchunk);
1195                 }
1196                 
1197                 if (useIdsmoothwindow) {
1198                         char* tempidsmoothwindow = new char[17]; 
1199                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
1200                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
1201                         cPara.push_back(tempidsmoothwindow);
1202                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1];
1203                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length());
1204                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
1205                         cPara.push_back(tempIdsmoothwindow);
1206                 }
1207                 
1208                 /*if (useMinsmoothid) {
1209                         char* tempminsmoothid = new char[14]; 
1210                         //strcpy(tempminsmoothid, "--minsmoothid"); 
1211                         *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13);
1212                         cPara.push_back(tempminsmoothid);
1213                         char* tempMinsmoothid = new char[minsmoothid.length()+1];
1214                         *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length());
1215                         //strcpy(tempMinsmoothid, minsmoothid.c_str());
1216                         cPara.push_back(tempMinsmoothid);
1217                 }*/
1218                 
1219                 if (useMaxp) {
1220                         char* tempmaxp = new char[7]; 
1221                         //strcpy(tempmaxp, "--maxp"); 
1222                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
1223                         cPara.push_back(tempmaxp);
1224                         char* tempMaxp = new char[maxp.length()+1];
1225                         *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length());
1226                         //strcpy(tempMaxp, maxp.c_str());
1227                         cPara.push_back(tempMaxp);
1228                 }
1229                 
1230                 if (!skipgaps) {
1231                         char* tempskipgaps = new char[13]; 
1232                         //strcpy(tempskipgaps, "--[no]skipgaps");
1233                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
1234                         cPara.push_back(tempskipgaps);
1235                 }
1236                 
1237                 if (!skipgaps2) {
1238                         char* tempskipgaps2 = new char[14]; 
1239                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
1240                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
1241                         cPara.push_back(tempskipgaps2);
1242                 }
1243                 
1244                 if (useMinlen) {
1245                         char* tempminlen = new char[9]; 
1246                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
1247                         //strcpy(tempminlen, "--minlen"); 
1248                         cPara.push_back(tempminlen);
1249                         char* tempMinlen = new char[minlen.length()+1];
1250                         //strcpy(tempMinlen, minlen.c_str());
1251                         *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length());
1252                         cPara.push_back(tempMinlen);
1253                 }
1254                 
1255                 if (useMaxlen) {
1256                         char* tempmaxlen = new char[9]; 
1257                         //strcpy(tempmaxlen, "--maxlen"); 
1258                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
1259                         cPara.push_back(tempmaxlen);
1260                         char* tempMaxlen = new char[maxlen.length()+1];
1261                         *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length());
1262                         //strcpy(tempMaxlen, maxlen.c_str());
1263                         cPara.push_back(tempMaxlen);
1264                 }
1265                 
1266                 if (ucl) {
1267                         char* tempucl = new char[5]; 
1268                         strcpy(tempucl, "--ucl"); 
1269                         cPara.push_back(tempucl);
1270                 }
1271                 
1272                 if (useQueryfract) {
1273                         char* tempqueryfract = new char[13]; 
1274                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
1275                         //strcpy(tempqueryfract, "--queryfract"); 
1276                         cPara.push_back(tempqueryfract);
1277                         char* tempQueryfract = new char[queryfract.length()+1];
1278                         *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length());
1279                         //strcpy(tempQueryfract, queryfract.c_str());
1280                         cPara.push_back(tempQueryfract);
1281                 }
1282                 
1283                 
1284                 char** uchimeParameters;
1285                 uchimeParameters = new char*[cPara.size()];
1286                 string commandString = "";
1287                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  commandString += toString(cPara[i]) + " "; } 
1288                 //int numArgs = cPara.size();
1289                 
1290                 //uchime_main(numArgs, uchimeParameters); 
1291                 //cout << "commandString = " << commandString << endl;
1292 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1293 #else
1294                 commandString = "\"" + commandString + "\"";
1295 #endif
1296         if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
1297                 system(commandString.c_str());
1298                 
1299                 //free memory
1300                 for(int i = 0; i < cPara.size(); i++)  {  delete cPara[i];  }
1301                 delete[] uchimeParameters; 
1302                 
1303                 //remove "" from filenames
1304                 outputFName = outputFName.substr(1, outputFName.length()-2);
1305                 filename = filename.substr(1, filename.length()-2);
1306                 alns = alns.substr(1, alns.length()-2);
1307                 
1308                 if (m->control_pressed) { return 0; }
1309                 
1310                 //create accnos file from uchime results
1311                 ifstream in; 
1312                 m->openInputFile(outputFName, in);
1313                 
1314                 ofstream out;
1315                 m->openOutputFile(accnos, out);
1316                 
1317                 int num = 0;
1318                 numChimeras = 0;
1319                 while(!in.eof()) {
1320                         
1321                         if (m->control_pressed) { break; }
1322                         
1323                         string name = "";
1324                         string chimeraFlag = "";
1325                         in >> chimeraFlag >> name;
1326                         
1327                         //fix name if needed
1328                         if (templatefile == "self") { 
1329                                 name = name.substr(0, name.length()-1); //rip off last /
1330                                 name = name.substr(0, name.find_last_of('/'));
1331                         }
1332                         
1333                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
1334                         m->gobble(in);
1335                         
1336                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
1337                         num++;
1338                 }
1339                 in.close();
1340                 out.close();
1341                 
1342                 return num;
1343         }
1344         catch(exception& e) {
1345                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
1346                 exit(1);
1347         }
1348 }
1349 /**************************************************************************************************/
1350
1351 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
1352         try {
1353                 
1354                 processIDS.clear();
1355                 int process = 1;
1356                 int num = 0;
1357                 vector<string> files;
1358                 
1359 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1360                 //break up file into multiple files
1361                 m->divideFile(filename, processors, files);
1362                 
1363                 if (m->control_pressed) {  return 0;  }
1364                                 
1365                 //loop through and create all the processes you want
1366                 while (process != processors) {
1367                         int pid = fork();
1368                         
1369                         if (pid > 0) {
1370                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1371                                 process++;
1372                         }else if (pid == 0){
1373                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras);
1374                                 
1375                                 //pass numSeqs to parent
1376                                 ofstream out;
1377                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1378                                 m->openOutputFile(tempFile, out);
1379                                 out << num << endl;
1380                                 out << numChimeras << endl;
1381                                 out.close();
1382                                 
1383                                 exit(0);
1384                         }else { 
1385                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1386                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1387                                 exit(0);
1388                         }
1389                 }
1390                 
1391                 //do my part
1392                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1393                 
1394                 //force parent to wait until all the processes are done
1395                 for (int i=0;i<processIDS.size();i++) { 
1396                         int temp = processIDS[i];
1397                         wait(&temp);
1398                 }
1399                 
1400                 for (int i = 0; i < processIDS.size(); i++) {
1401                         ifstream in;
1402                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1403                         m->openInputFile(tempFile, in);
1404                         if (!in.eof()) { 
1405                                 int tempNum = 0; 
1406                                 in >> tempNum; m->gobble(in);
1407                                 num += tempNum; 
1408                                 in >> tempNum;
1409                                 numChimeras += tempNum;
1410                         }
1411                         in.close(); m->mothurRemove(tempFile);
1412                 }
1413 #else
1414                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1415                 //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
1416                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1417                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1418                 
1419                 //divide file
1420                 int count = 0;
1421                 int spot = 0;
1422                 map<int, ofstream*> filehandles;
1423                 map<int, ofstream*>::iterator it3;
1424                 
1425                 ofstream* temp;
1426                 for (int i = 0; i < processors; i++) {
1427                         temp = new ofstream;
1428                         filehandles[i] = temp;
1429                         m->openOutputFile(filename+toString(i)+".temp", *(temp));
1430                         files.push_back(filename+toString(i)+".temp");
1431                 }
1432                 
1433                 ifstream in;
1434                 m->openInputFile(filename, in);
1435                 
1436                 while(!in.eof()) {
1437                         
1438                         if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
1439                         
1440                         Sequence tempSeq(in); m->gobble(in); 
1441                         
1442                         if (tempSeq.getName() != "") {
1443                                 tempSeq.printSequence(*(filehandles[spot])); 
1444                                 spot++; count++;
1445                                 if (spot == processors) { spot = 0; }
1446                         }
1447                 }
1448                 in.close();
1449                 
1450                 //delete memory
1451                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1452                         (*(it3->second)).close();
1453                         delete it3->second;
1454                 }
1455                 
1456                 //sanity check for number of processors
1457                 if (count < processors) { processors = count; }
1458                 
1459                 vector<uchimeData*> pDataArray; 
1460                 DWORD   dwThreadIdArray[processors-1];
1461                 HANDLE  hThreadArray[processors-1]; 
1462                 vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
1463                 
1464                 //Create processor worker threads.
1465                 for( int i=1; i<processors; i++ ){
1466                         // Allocate memory for thread data.
1467                         string extension = toString(i) + ".temp";
1468                         
1469                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
1470                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1471                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1472                         
1473                         pDataArray.push_back(tempUchime);
1474                         processIDS.push_back(i);
1475                         
1476                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
1477                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1478                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1479                 }
1480                 
1481                 
1482                 //using the main process as a worker saves time and memory
1483                 num = driver(outputFileName, files[0], accnos, alns, numChimeras);
1484                 
1485                 //Wait until all threads have terminated.
1486                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1487                 
1488                 //Close all thread handles and free memory allocations.
1489                 for(int i=0; i < pDataArray.size(); i++){
1490                         num += pDataArray[i]->count;
1491                         numChimeras += pDataArray[i]->numChimeras;
1492                         CloseHandle(hThreadArray[i]);
1493                         delete pDataArray[i];
1494                 }
1495 #endif          
1496                 
1497                 //append output files
1498                 for(int i=0;i<processIDS.size();i++){
1499                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1500                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1501                         
1502                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1503                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1504                         
1505                         if (chimealns) {
1506                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1507                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1508                         }
1509                 }
1510                 
1511                 //get rid of the file pieces.
1512                 for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
1513                 return num;     
1514         }
1515         catch(exception& e) {
1516                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
1517                 exit(1);
1518         }
1519 }
1520 /**************************************************************************************************/
1521
1522 int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
1523         try {
1524                 
1525                 processIDS.clear();
1526                 int process = 1;
1527                 int num = 0;
1528                 
1529                 //sanity check
1530                 if (groups.size() < processors) { processors = groups.size(); }
1531                 
1532                 //divide the groups between the processors
1533                 vector<linePair> lines;
1534                 int numGroupsPerProcessor = groups.size() / processors;
1535                 for (int i = 0; i < processors; i++) {
1536                         int startIndex =  i * numGroupsPerProcessor;
1537                         int endIndex = (i+1) * numGroupsPerProcessor;
1538                         if(i == (processors - 1)){      endIndex = groups.size();       }
1539                         lines.push_back(linePair(startIndex, endIndex));
1540                 }
1541                 
1542 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
1543                                 
1544                 //loop through and create all the processes you want
1545                 while (process != processors) {
1546                         int pid = fork();
1547                         
1548                         if (pid > 0) {
1549                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1550                                 process++;
1551                         }else if (pid == 0){
1552                                 num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups);
1553                                 
1554                                 //pass numSeqs to parent
1555                                 ofstream out;
1556                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1557                                 m->openOutputFile(tempFile, out);
1558                                 out << num << endl;
1559                                 out.close();
1560                                 
1561                                 exit(0);
1562                         }else { 
1563                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1564                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1565                                 exit(0);
1566                         }
1567                 }
1568                 
1569                 //do my part
1570                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1571                 
1572                 //force parent to wait until all the processes are done
1573                 for (int i=0;i<processIDS.size();i++) { 
1574                         int temp = processIDS[i];
1575                         wait(&temp);
1576                 }
1577                 
1578                 for (int i = 0; i < processIDS.size(); i++) {
1579                         ifstream in;
1580                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1581                         m->openInputFile(tempFile, in);
1582                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1583                         in.close(); m->mothurRemove(tempFile);
1584                 }
1585                                 
1586 #else
1587                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1588                 //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
1589                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1590                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1591                 
1592                 vector<uchimeData*> pDataArray; 
1593                 DWORD   dwThreadIdArray[processors-1];
1594                 HANDLE  hThreadArray[processors-1]; 
1595                 
1596                 //Create processor worker threads.
1597                 for( int i=1; i<processors; i++ ){
1598                         // Allocate memory for thread data.
1599                         string extension = toString(i) + ".temp";
1600                         
1601                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
1602                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
1603                         tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
1604                         
1605                         pDataArray.push_back(tempUchime);
1606                         processIDS.push_back(i);
1607                         
1608                         //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
1609                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1610                         hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1611                 }
1612                 
1613                 
1614                 //using the main process as a worker saves time and memory
1615                 num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
1616                 
1617                 //Wait until all threads have terminated.
1618                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1619                 
1620                 //Close all thread handles and free memory allocations.
1621                 for(int i=0; i < pDataArray.size(); i++){
1622                         num += pDataArray[i]->count;
1623                         CloseHandle(hThreadArray[i]);
1624                         delete pDataArray[i];
1625                 }
1626 #endif          
1627                 
1628                                 
1629                 //append output files
1630                 for(int i=0;i<processIDS.size();i++){
1631                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1632                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1633                         
1634                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1635                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1636                         
1637                         if (chimealns) {
1638                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
1639                                 m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
1640                         }
1641                 }
1642                 
1643                 return num;     
1644                 
1645         }
1646         catch(exception& e) {
1647                 m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups");
1648                 exit(1);
1649         }
1650 }
1651 /**************************************************************************************************/
1652