]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
1.20.0
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 #include "uc.h"
13 #include "sequence.hpp"
14
15
16 //**********************************************************************************************************************
17 vector<string> ChimeraUchimeCommand::setParameters(){   
18         try {
19                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
26                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
27                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
28                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
29                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
30                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
31                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
32                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
33                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
34                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
35                 CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
36                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
37                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
38                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
39                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
40                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
41                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
42                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
43                 
44                 vector<string> myArray;
45                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 string ChimeraUchimeCommand::getHelpString(){   
55         try {
56                 string helpString = "";
57                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
58                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
59                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
60                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
61                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
62                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
63                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
64                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
65                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
66                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
67                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
68                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
69                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
70                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
71                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
72                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
73                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
74                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
75                 helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
76                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
77                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
78                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
79                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
80                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
81                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
82                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
83 #ifdef USE_MPI
84                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
85 #endif
86                 helpString += "The chimera.uchime command should be in the following format: \n";
87                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
88                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
89                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
90                 return helpString;
91         }
92         catch(exception& e) {
93                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
94                 exit(1);
95         }
96 }
97 //**********************************************************************************************************************
98 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
99         try {
100                 abort = true; calledHelp = true;
101                 setParameters();
102                 vector<string> tempOutNames;
103                 outputTypes["chimera"] = tempOutNames;
104                 outputTypes["accnos"] = tempOutNames;
105                 outputTypes["alns"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
114         try {
115                 abort = false; calledHelp = false;   
116                 
117                 //allow user to run help
118                 if(option == "help") { help(); abort = true; calledHelp = true; }
119                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120                 
121                 else {
122                         vector<string> myArray = setParameters();
123                         
124                         OptionParser parser(option);
125                         map<string,string> parameters = parser.getParameters();
126                         
127                         ValidParameters validParameter("chimera.uchime");
128                         map<string,string>::iterator it;
129                         
130                         //check to make sure all parameters are valid for command
131                         for (it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         vector<string> tempOutNames;
136                         outputTypes["chimera"] = tempOutNames;
137                         outputTypes["accnos"] = tempOutNames;
138                         outputTypes["alns"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         
144                         //check for required parameters
145                         fastafile = validParameter.validFile(parameters, "fasta", false);
146                         if (fastafile == "not found") {                                 
147                                 //if there is a current fasta file, use it
148                                 string filename = m->getFastaFile(); 
149                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
151                         }else { 
152                                 m->splitAtDash(fastafile, fastaFileNames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < fastaFileNames.size(); i++) {
156                                         
157                                         bool ignore = false;
158                                         if (fastaFileNames[i] == "current") { 
159                                                 fastaFileNames[i] = m->getFastaFile(); 
160                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
161                                                 else {  
162                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
163                                                         //erase from file list
164                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
165                                                         i--;
166                                                 }
167                                         }
168                                         
169                                         if (!ignore) {
170                                                 
171                                                 if (inputDir != "") {
172                                                         string path = m->hasPath(fastaFileNames[i]);
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
175                                                 }
176                                                 
177                                                 int ableToOpen;
178                                                 ifstream in;
179                                                 
180                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
181                                                 
182                                                 //if you can't open it, try default location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getDefaultPath() != "") { //default path is set
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 if (ableToOpen == 1) {
195                                                         if (m->getOutputDir() != "") { //default path is set
196                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
198                                                                 ifstream in2;
199                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
200                                                                 in2.close();
201                                                                 fastaFileNames[i] = tryPath;
202                                                         }
203                                                 }
204                                                 
205                                                 in.close();
206                                                 
207                                                 if (ableToOpen == 1) { 
208                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
209                                                         //erase from file list
210                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
211                                                         i--;
212                                                 }else {
213                                                         m->setFastaFile(fastaFileNames[i]);
214                                                 }
215                                         }
216                                 }
217                                 
218                                 //make sure there is at least one valid file left
219                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
220                         }
221                         
222                         
223                         //check for required parameters
224                         bool hasName = true;
225                         namefile = validParameter.validFile(parameters, "name", false);
226                         if (namefile == "not found") { namefile = "";  hasName = false; }
227                         else { 
228                                 m->splitAtDash(namefile, nameFileNames);
229                                 
230                                 //go through files and make sure they are good, if not, then disregard them
231                                 for (int i = 0; i < nameFileNames.size(); i++) {
232                                         
233                                         bool ignore = false;
234                                         if (nameFileNames[i] == "current") { 
235                                                 nameFileNames[i] = m->getNameFile(); 
236                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
237                                                 else {  
238                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
239                                                         //erase from file list
240                                                         nameFileNames.erase(nameFileNames.begin()+i);
241                                                         i--;
242                                                 }
243                                         }
244                                         
245                                         if (!ignore) {
246                                                 
247                                                 if (inputDir != "") {
248                                                         string path = m->hasPath(nameFileNames[i]);
249                                                         //if the user has not given a path then, add inputdir. else leave path alone.
250                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
251                                                 }
252                                                 
253                                                 int ableToOpen;
254                                                 ifstream in;
255                                                 
256                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
257                                                 
258                                                 //if you can't open it, try default location
259                                                 if (ableToOpen == 1) {
260                                                         if (m->getDefaultPath() != "") { //default path is set
261                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
262                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
263                                                                 ifstream in2;
264                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
265                                                                 in2.close();
266                                                                 nameFileNames[i] = tryPath;
267                                                         }
268                                                 }
269                                                 
270                                                 if (ableToOpen == 1) {
271                                                         if (m->getOutputDir() != "") { //default path is set
272                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
273                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
274                                                                 ifstream in2;
275                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
276                                                                 in2.close();
277                                                                 nameFileNames[i] = tryPath;
278                                                         }
279                                                 }
280                                                 
281                                                 in.close();
282                                                 
283                                                 if (ableToOpen == 1) { 
284                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
285                                                         //erase from file list
286                                                         nameFileNames.erase(nameFileNames.begin()+i);
287                                                         i--;
288                                                 }else {
289                                                         m->setNameFile(nameFileNames[i]);
290                                                 }
291                                         }
292                                 }
293                                 
294                                 //make sure there is at least one valid file left
295                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
296                         }
297                         
298                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
299                         
300                         //if the user changes the output directory command factory will send this info to us in the output parameter 
301                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
302                         
303                         
304                         string path;
305                         it = parameters.find("reference");
306                         //user has given a template file
307                         if(it != parameters.end()){ 
308                                 if (it->second == "self") { templatefile = "self"; }
309                                 else {
310                                         path = m->hasPath(it->second);
311                                         //if the user has not given a path then, add inputdir. else leave path alone.
312                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
313                                         
314                                         templatefile = validParameter.validFile(parameters, "reference", true);
315                                         if (templatefile == "not open") { abort = true; }
316                                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true;  }  
317                                 }
318                         }else if (hasName) {  templatefile = "self"; }
319                         else { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true;  }       
320
321                         
322                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
323                         m->setProcessors(temp);
324                         convert(temp, processors);
325                         
326                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
327                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
328                         
329                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
330                         chimealns = m->isTrue(temp); 
331                         
332                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
333                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
334                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
335                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
336                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
337                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
338                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
339                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
340                         minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);               if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
341                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
342                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
343                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
344                         
345                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
346                         ucl = m->isTrue(temp);
347                         
348                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
349                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
350                         
351                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
352                         skipgaps = m->isTrue(temp); 
353
354                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
355                         skipgaps2 = m->isTrue(temp); 
356
357                 }
358         }
359         catch(exception& e) {
360                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
361                 exit(1);
362         }
363 }
364 //***************************************************************************************************************
365
366 int ChimeraUchimeCommand::execute(){
367         try{
368                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
369                 
370                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
371                 
372                 for (int s = 0; s < fastaFileNames.size(); s++) {
373                         
374                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
375                         
376                         int start = time(NULL); 
377                         string nameFile = "";
378                         
379                         if (templatefile == "self") { //you want to run uchime with a refernce template
380                                 
381                                 #ifdef USE_MPI  
382                                         int pid; 
383                                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
384                                         if (pid == 0) { //you are the root process 
385                                 #endif  
386                                 
387                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
388                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
389                                         nameFile = nameFileNames[s];
390                                 }else {
391                                         m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
392                                         
393                                         //use unique.seqs to create new name and fastafile
394                                         string inputString = "fasta=" + fastaFileNames[s];
395                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
396                                         m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
397                                         
398                                         Command* uniqueCommand = new DeconvoluteCommand(inputString);
399                                         uniqueCommand->execute();
400                                         
401                                         map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
402                                         
403                                         delete uniqueCommand;
404                                         
405                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
406                                         
407                                         nameFile = filenames["name"][0];
408                                         fastaFileNames[s] = filenames["fasta"][0];
409                                 }
410                                 
411                                 //create input file for uchime
412                                 //read through fastafile and store info
413                                 map<string, string> seqs;
414                                 ifstream in;
415                                 m->openInputFile(fastaFileNames[s], in);
416                                 
417                                 while (!in.eof()) {
418                                         
419                                         if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  return 0; }
420                                         
421                                         Sequence seq(in); m->gobble(in);
422                                         seqs[seq.getName()] = seq.getAligned();
423                                 }
424                                 in.close();
425                                 
426                                 //read namefile
427                                 vector<seqPriorityNode> nameMapCount;
428                                 int error = m->readNames(nameFile, nameMapCount, seqs);
429                                 
430                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0; }
431                                 
432                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0; }
433                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); }  return 0; }
434                                 
435                                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
436                                 
437                                 string newFasta = fastaFileNames[s] + ".temp";
438                                 ofstream out;
439                                 m->openOutputFile(newFasta, out);
440                                 
441                                 //print new file in order of
442                                 for (int i = 0; i < nameMapCount.size(); i++) {
443                                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
444                                 }
445                                 out.close();
446                                 
447                                 fastaFileNames[s] = newFasta;
448                                                 
449                                 #ifdef USE_MPI  
450                                         }
451                                 #endif
452                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); }  return 0;    }                               
453                         }
454                         
455                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
456                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
457                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
458                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
459                         
460                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); }  return 0;    }
461                         
462                         int numSeqs = 0;
463 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464                         if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
465                         else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
466 #else
467                         numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
468 #endif
469                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } return 0; }
470                         
471                         //remove file made for uchime
472                         if (templatefile == "self") {  remove(fastaFileNames[s].c_str()); }
473                         
474                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
475                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
476                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
477                         
478                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
479                 }
480                 
481                 //set accnos file as new current accnosfile
482                 string current = "";
483                 itTypes = outputTypes.find("accnos");
484                 if (itTypes != outputTypes.end()) {
485                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
486                 }
487                 
488                 m->mothurOutEndLine();
489                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
490                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
491                 m->mothurOutEndLine();
492                 
493                 return 0;
494                 
495         }
496         catch(exception& e) {
497                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
498                 exit(1);
499         }
500 }
501 //**********************************************************************************************************************
502
503 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
504         try {
505                 
506                 vector<char*> cPara;
507                 
508                 char* tempUchime = new char[8];  
509                 strcpy(tempUchime, "./uchime "); 
510                 cPara.push_back(tempUchime);
511                 
512                 char* tempIn = new char[7];  
513                 strcpy(tempIn, "--input"); 
514                 cPara.push_back(tempIn);
515                 char* temp = new char[filename.length()];
516                 strcpy(temp, filename.c_str());
517                 cPara.push_back(temp);
518                 
519                 //are you using a reference file
520                 if (templatefile != "self") {
521                                                 
522                         //add reference file
523                         char* tempRef = new char[4]; 
524                         strcpy(tempRef, "--db"); 
525                         cPara.push_back(tempRef);  
526                         char* tempR = new char[templatefile.length()];
527                         strcpy(tempR, templatefile.c_str());
528                         cPara.push_back(tempR);
529                 }
530                 
531                 char* tempO = new char[11]; 
532                 strcpy(tempO, "--uchimeout"); 
533                 cPara.push_back(tempO);
534                 char* tempout = new char[outputFName.length()];
535                 strcpy(tempout, outputFName.c_str());
536                 cPara.push_back(tempout);
537                 
538                 if (chimealns) {
539                         char* tempA = new char[12]; 
540                         strcpy(tempA, "--uchimealns"); 
541                         cPara.push_back(tempA);
542                         char* tempa = new char[alns.length()];
543                         strcpy(tempa, alns.c_str());
544                         cPara.push_back(tempa);
545                 }
546                 
547                 if (useAbskew) {
548                         char* tempskew = new char[8]; 
549                         strcpy(tempskew, "--abskew"); 
550                         cPara.push_back(tempskew);
551                         char* tempSkew = new char[abskew.length()];
552                         strcpy(tempSkew, abskew.c_str());
553                         cPara.push_back(tempSkew);
554                 }
555                 
556                 if (useMinH) {
557                         char* tempminh = new char[6]; 
558                         strcpy(tempminh, "--minh"); 
559                         cPara.push_back(tempminh);
560                         char* tempMinH = new char[minh.length()];
561                         strcpy(tempMinH, minh.c_str());
562                         cPara.push_back(tempMinH);
563                 }
564                 
565                 if (useMindiv) {
566                         char* tempmindiv = new char[8]; 
567                         strcpy(tempmindiv, "--mindiv"); 
568                         cPara.push_back(tempmindiv);
569                         char* tempMindiv = new char[mindiv.length()];
570                         strcpy(tempMindiv, mindiv.c_str());
571                         cPara.push_back(tempMindiv);
572                 }
573                 
574                 if (useXn) {
575                         char* tempxn = new char[4]; 
576                         strcpy(tempxn, "--xn"); 
577                         cPara.push_back(tempxn);
578                         char* tempXn = new char[xn.length()];
579                         strcpy(tempXn, xn.c_str());
580                         cPara.push_back(tempXn);
581                 }
582                 
583                 if (useDn) {
584                         char* tempdn = new char[4]; 
585                         strcpy(tempdn, "--dn"); 
586                         cPara.push_back(tempdn);
587                         char* tempDn = new char[dn.length()];
588                         strcpy(tempDn, dn.c_str());
589                         cPara.push_back(tempDn);
590                 }
591                 
592                 if (useXa) {
593                         char* tempxa = new char[4]; 
594                         strcpy(tempxa, "--xa"); 
595                         cPara.push_back(tempxa);
596                         char* tempXa = new char[xa.length()];
597                         strcpy(tempXa, xa.c_str());
598                         cPara.push_back(tempXa);
599                 }
600                 
601                 if (useChunks) {
602                         char* tempchunks = new char[8]; 
603                         strcpy(tempchunks, "--chunks"); 
604                         cPara.push_back(tempchunks);
605                         char* tempChunks = new char[chunks.length()];
606                         strcpy(tempChunks, chunks.c_str());
607                         cPara.push_back(tempChunks);
608                 }
609                 
610                 if (useMinchunk) {
611                         char* tempminchunk = new char[10]; 
612                         strcpy(tempminchunk, "--minchunk"); 
613                         cPara.push_back(tempminchunk);
614                         char* tempMinchunk = new char[minchunk.length()];
615                         strcpy(tempMinchunk, minchunk.c_str());
616                         cPara.push_back(tempMinchunk);
617                 }
618                 
619                 if (useIdsmoothwindow) {
620                         char* tempidsmoothwindow = new char[16]; 
621                         strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
622                         cPara.push_back(tempidsmoothwindow);
623                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()];
624                         strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
625                         cPara.push_back(tempIdsmoothwindow);
626                 }
627                 
628                 if (useMinsmoothid) {
629                         char* tempminsmoothid = new char[13]; 
630                         strcpy(tempminsmoothid, "--minsmoothid"); 
631                         cPara.push_back(tempminsmoothid);
632                         char* tempMinsmoothid = new char[minsmoothid.length()];
633                         strcpy(tempMinsmoothid, minsmoothid.c_str());
634                         cPara.push_back(tempMinsmoothid);
635                 }
636                 
637                 if (useMaxp) {
638                         char* tempmaxp = new char[6]; 
639                         strcpy(tempmaxp, "--maxp"); 
640                         cPara.push_back(tempmaxp);
641                         char* tempMaxp = new char[maxp.length()];
642                         strcpy(tempMaxp, maxp.c_str());
643                         cPara.push_back(tempMaxp);
644                 }
645                 
646                 if (!skipgaps) {
647                         char* tempskipgaps = new char[14]; 
648                         strcpy(tempskipgaps, "--[no]skipgaps"); 
649                         cPara.push_back(tempskipgaps);
650                 }
651                 
652                 if (!skipgaps2) {
653                         char* tempskipgaps2 = new char[15]; 
654                         strcpy(tempskipgaps2, "--[no]skipgaps2"); 
655                         cPara.push_back(tempskipgaps2);
656                 }
657                 
658                 if (useMinlen) {
659                         char* tempminlen = new char[8]; 
660                         strcpy(tempminlen, "--minlen"); 
661                         cPara.push_back(tempminlen);
662                         char* tempMinlen = new char[minlen.length()];
663                         strcpy(tempMinlen, minlen.c_str());
664                         cPara.push_back(tempMinlen);
665                 }
666                 
667                 if (useMaxlen) {
668                         char* tempmaxlen = new char[8]; 
669                         strcpy(tempmaxlen, "--maxlen"); 
670                         cPara.push_back(tempmaxlen);
671                         char* tempMaxlen = new char[maxlen.length()];
672                         strcpy(tempMaxlen, maxlen.c_str());
673                         cPara.push_back(tempMaxlen);
674                 }
675                 
676                 if (ucl) {
677                         char* tempucl = new char[5]; 
678                         strcpy(tempucl, "--ucl"); 
679                         cPara.push_back(tempucl);
680                 }
681                 
682                 if (useQueryfract) {
683                         char* tempqueryfract = new char[12]; 
684                         strcpy(tempqueryfract, "--queryfract"); 
685                         cPara.push_back(tempqueryfract);
686                         char* tempQueryfract = new char[queryfract.length()];
687                         strcpy(tempQueryfract, queryfract.c_str());
688                         cPara.push_back(tempQueryfract);
689                 }
690                 
691                 
692                 char** uchimeParameters;
693                 uchimeParameters = new char*[cPara.size()];
694                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  } 
695                 int numArgs = cPara.size();
696                 
697                 uchime_main(numArgs, uchimeParameters); 
698                 
699                 //free memory
700                 for(int i = 0; i < cPara.size(); i++)  {  delete[] cPara[i];  }
701                 delete[] uchimeParameters; 
702                 
703                 if (m->control_pressed) { return 0; }
704                 
705                 //create accnos file from uchime results
706                 ifstream in; 
707                 m->openInputFile(outputFName, in);
708                 
709                 ofstream out;
710                 m->openOutputFile(accnos, out);
711                 
712                 int num = 0;
713                 while(!in.eof()) {
714                         
715                         if (m->control_pressed) { break; }
716                         
717                         string name = "";
718                         string chimeraFlag = "";
719                         in >> chimeraFlag >> name;
720                         
721                         //fix name if needed
722                         if (templatefile == "self") { 
723                                 name = name.substr(0, name.length()-1); //rip off last /
724                                 name = name.substr(0, name.find_last_of('/'));
725                         }
726                         
727                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
728                         m->gobble(in);
729                         
730                         if (chimeraFlag == "Y") {  out << name << endl; }
731                         num++;
732                 }
733                 in.close();
734                 out.close();
735                 
736                 return num;
737         }
738         catch(exception& e) {
739                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
740                 exit(1);
741         }
742 }
743 /**************************************************************************************************/
744
745 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
746         try {
747                 
748                 processIDS.clear();
749                 int process = 1;
750                 int num = 0;
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
752                 //break up file into multiple files
753                 vector<string> files;
754                 m->divideFile(filename, processors, files);
755                 
756                 if (m->control_pressed) {  return 0;  }
757                 
758 #ifdef USE_MPI  
759                 int pid, numSeqsPerProcessor; 
760                 int tag = 2001;
761                 
762                 MPI_Status status; 
763                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
764                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
765                                 
766                 if (pid == 0) { //you are the root process 
767                         num = driver(outputFileName, files[0], accnos, alns);
768                         
769                         if (templatefile != "self") {
770                                 //wait on chidren
771                                 for(int j = 1; j < processors; j++) { 
772                                         int temp;
773                                         MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
774                                         num += temp;
775                                         
776                                         m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
777                                         remove((outputFileName + toString(j) + ".temp").c_str());
778                                         
779                                         m->appendFiles((accnos + toString(j) + ".temp"), accnos);
780                                         remove((accnos + toString(j) + ".temp").c_str());
781                                         
782                                         if (chimealns) {
783                                                 m->appendFiles((alns + toString(j) + ".temp"), alns);
784                                                 remove((alns + toString(j) + ".temp").c_str());
785                                         }
786                                 }
787                         }
788                 }else{ //you are a child process
789                         if (templatefile != "self") { //if template=self we can only use 1 processor
790                                 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp"); 
791                                 
792                                 //send numSeqs to parent
793                                 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
794                         }
795                 }
796
797                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
798 #else
799                 
800                 //loop through and create all the processes you want
801                 while (process != processors) {
802                         int pid = fork();
803                         
804                         if (pid > 0) {
805                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
806                                 process++;
807                         }else if (pid == 0){
808                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
809                                 
810                                 //pass numSeqs to parent
811                                 ofstream out;
812                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
813                                 m->openOutputFile(tempFile, out);
814                                 out << num << endl;
815                                 out.close();
816                                 
817                                 exit(0);
818                         }else { 
819                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
820                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
821                                 exit(0);
822                         }
823                 }
824                 
825                 //do my part
826                 num = driver(outputFileName, files[0], accnos, alns);
827                 
828                 //force parent to wait until all the processes are done
829                 for (int i=0;i<processIDS.size();i++) { 
830                         int temp = processIDS[i];
831                         wait(&temp);
832                 }
833                 
834                 for (int i = 0; i < processIDS.size(); i++) {
835                         ifstream in;
836                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
837                         m->openInputFile(tempFile, in);
838                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
839                         in.close(); remove(tempFile.c_str());
840                 }
841                 
842                 
843                 //append output files
844                 for(int i=0;i<processIDS[i];i++){
845                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
846                         remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
847                         
848                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
849                         remove((accnos + toString(processIDS[i]) + ".temp").c_str());
850                         
851                         if (chimealns) {
852                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
853                                 remove((alns + toString(processIDS[i]) + ".temp").c_str());
854                         }
855                 }
856 #endif          
857                 //get rid of the file pieces.
858                 for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
859 #endif          
860                 return num;     
861         }
862         catch(exception& e) {
863                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
864                 exit(1);
865         }
866 }
867
868 /**************************************************************************************************/
869