]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.cpp
working on current change
[mothur.git] / chimerauchimecommand.cpp
1 /*
2  *  chimerauchimecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/13/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerauchimecommand.h"
11 #include "deconvolutecommand.h"
12 #include "uc.h"
13 #include "sequence.hpp"
14
15
16 //**********************************************************************************************************************
17 vector<string> ChimeraUchimeCommand::setParameters(){   
18         try {
19                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew);
26                 CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns);
27                 CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh);
28                 CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv);
29                 CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn);
30                 CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn);
31                 CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa);
32                 CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks);
33                 CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk);
34                 CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow);
35                 CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid);
36                 CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp);
37                 CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps);
38                 CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2);
39                 CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen);
40                 CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
41                 CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
42                 CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
43                 
44                 vector<string> myArray;
45                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "ChimeraUchimeCommand", "setParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 string ChimeraUchimeCommand::getHelpString(){   
55         try {
56                 string helpString = "";
57                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
58                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
59                 helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
60                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
61                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
62                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
63                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
64                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
65                 helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
66                 helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
67                 helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
68                 helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease --min h, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
69                 helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
70                 helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
71                 helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n";
72                 helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n";
73                 helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n";
74                 helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n";
75                 helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n";
76                 helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n";
77                 helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n";
78                 helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n";
79                 helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n";
80                 helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n";
81                 helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n";
82                 helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n";
83 #ifdef USE_MPI
84                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
85 #endif
86                 helpString += "The chimera.uchime command should be in the following format: \n";
87                 helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n";
88                 helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n";
89                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
90                 return helpString;
91         }
92         catch(exception& e) {
93                 m->errorOut(e, "ChimeraUchimeCommand", "getHelpString");
94                 exit(1);
95         }
96 }
97 //**********************************************************************************************************************
98 ChimeraUchimeCommand::ChimeraUchimeCommand(){   
99         try {
100                 abort = true; calledHelp = true;
101                 setParameters();
102                 vector<string> tempOutNames;
103                 outputTypes["chimera"] = tempOutNames;
104                 outputTypes["accnos"] = tempOutNames;
105                 outputTypes["alns"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
114         try {
115                 abort = false; calledHelp = false;   
116                 
117                 //allow user to run help
118                 if(option == "help") { help(); abort = true; calledHelp = true; }
119                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120                 
121                 else {
122                         vector<string> myArray = setParameters();
123                         
124                         OptionParser parser(option);
125                         map<string,string> parameters = parser.getParameters();
126                         
127                         ValidParameters validParameter("chimera.uchime");
128                         map<string,string>::iterator it;
129                         
130                         //check to make sure all parameters are valid for command
131                         for (it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         vector<string> tempOutNames;
136                         outputTypes["chimera"] = tempOutNames;
137                         outputTypes["accnos"] = tempOutNames;
138                         outputTypes["alns"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         
144                         //check for required parameters
145                         fastafile = validParameter.validFile(parameters, "fasta", false);
146                         if (fastafile == "not found") {                                 
147                                 //if there is a current fasta file, use it
148                                 string filename = m->getFastaFile(); 
149                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
151                         }else { 
152                                 m->splitAtDash(fastafile, fastaFileNames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < fastaFileNames.size(); i++) {
156                                         
157                                         bool ignore = false;
158                                         if (fastaFileNames[i] == "current") { 
159                                                 fastaFileNames[i] = m->getFastaFile(); 
160                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
161                                                 else {  
162                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
163                                                         //erase from file list
164                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
165                                                         i--;
166                                                 }
167                                         }
168                                         
169                                         if (!ignore) {
170                                                 
171                                                 if (inputDir != "") {
172                                                         string path = m->hasPath(fastaFileNames[i]);
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
175                                                 }
176                                                 
177                                                 int ableToOpen;
178                                                 ifstream in;
179                                                 
180                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
181                                                 
182                                                 //if you can't open it, try default location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getDefaultPath() != "") { //default path is set
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 if (ableToOpen == 1) {
195                                                         if (m->getOutputDir() != "") { //default path is set
196                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
198                                                                 ifstream in2;
199                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
200                                                                 in2.close();
201                                                                 fastaFileNames[i] = tryPath;
202                                                         }
203                                                 }
204                                                 
205                                                 in.close();
206                                                 
207                                                 if (ableToOpen == 1) { 
208                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
209                                                         //erase from file list
210                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
211                                                         i--;
212                                                 }else {
213                                                         m->setFastaFile(fastaFileNames[i]);
214                                                 }
215                                         }
216                                 }
217                                 
218                                 //make sure there is at least one valid file left
219                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
220                         }
221                         
222                         
223                         //check for required parameters
224                         bool hasName = true;
225                         namefile = validParameter.validFile(parameters, "name", false);
226                         if (namefile == "not found") { namefile = "";  hasName = false; }
227                         else { 
228                                 m->splitAtDash(namefile, nameFileNames);
229                                 
230                                 //go through files and make sure they are good, if not, then disregard them
231                                 for (int i = 0; i < nameFileNames.size(); i++) {
232                                         
233                                         bool ignore = false;
234                                         if (nameFileNames[i] == "current") { 
235                                                 nameFileNames[i] = m->getNameFile(); 
236                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
237                                                 else {  
238                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
239                                                         //erase from file list
240                                                         nameFileNames.erase(nameFileNames.begin()+i);
241                                                         i--;
242                                                 }
243                                         }
244                                         
245                                         if (!ignore) {
246                                                 
247                                                 if (inputDir != "") {
248                                                         string path = m->hasPath(nameFileNames[i]);
249                                                         //if the user has not given a path then, add inputdir. else leave path alone.
250                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
251                                                 }
252                                                 
253                                                 int ableToOpen;
254                                                 ifstream in;
255                                                 
256                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
257                                                 
258                                                 //if you can't open it, try default location
259                                                 if (ableToOpen == 1) {
260                                                         if (m->getDefaultPath() != "") { //default path is set
261                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
262                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
263                                                                 ifstream in2;
264                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
265                                                                 in2.close();
266                                                                 nameFileNames[i] = tryPath;
267                                                         }
268                                                 }
269                                                 
270                                                 if (ableToOpen == 1) {
271                                                         if (m->getOutputDir() != "") { //default path is set
272                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
273                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
274                                                                 ifstream in2;
275                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
276                                                                 in2.close();
277                                                                 nameFileNames[i] = tryPath;
278                                                         }
279                                                 }
280                                                 
281                                                 in.close();
282                                                 
283                                                 if (ableToOpen == 1) { 
284                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
285                                                         //erase from file list
286                                                         nameFileNames.erase(nameFileNames.begin()+i);
287                                                         i--;
288                                                 }else {
289                                                         m->setNameFile(nameFileNames[i]);
290                                                 }
291                                         }
292                                 }
293                                 
294                                 //make sure there is at least one valid file left
295                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
296                         }
297                         
298                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
299                         
300                         //if the user changes the output directory command factory will send this info to us in the output parameter 
301                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
302                         
303                         
304                         string path;
305                         it = parameters.find("reference");
306                         //user has given a template file
307                         if(it != parameters.end()){ 
308                                 if (it->second == "self") { templatefile = "self"; }
309                                 else {
310                                         path = m->hasPath(it->second);
311                                         //if the user has not given a path then, add inputdir. else leave path alone.
312                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
313                                         
314                                         templatefile = validParameter.validFile(parameters, "reference", true);
315                                         if (templatefile == "not open") { abort = true; }
316                                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true;  }  
317                                 }
318                         }else if (hasName) {  templatefile = "self"; }
319                         else { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true;  }       
320
321                         
322                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
323                         m->setProcessors(temp);
324                         convert(temp, processors);
325                         
326                         abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
327                         if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
328                         
329                         temp = validParameter.validFile(parameters, "chimealns", false);                        if (temp == "not found") { temp = "f"; }
330                         chimealns = m->isTrue(temp); 
331                         
332                         minh = validParameter.validFile(parameters, "minh", false);                                             if (minh == "not found")                        { useMinH = false; minh = "0.3";                                        }       else{ useMinH = true;                   }
333                         mindiv = validParameter.validFile(parameters, "mindiv", false);                                 if (mindiv == "not found")                      { useMindiv = false; mindiv = "0.5";                            }       else{ useMindiv = true;                 }
334                         xn = validParameter.validFile(parameters, "xn", false);                                                 if (xn == "not found")                          { useXn = false; xn = "8.0";                                            }       else{ useXn = true;                             }
335                         dn = validParameter.validFile(parameters, "dn", false);                                                 if (dn == "not found")                          { useDn = false; dn = "1.4";                                            }       else{ useDn = true;                             }
336                         xa = validParameter.validFile(parameters, "xa", false);                                                 if (xa == "not found")                          { useXa = false; xa = "1";                                                      }       else{ useXa = true;                             }
337                         chunks = validParameter.validFile(parameters, "chunks", false);                                 if (chunks == "not found")                      { useChunks = false; chunks = "4";                                      }       else{ useChunks = true;                 }
338                         minchunk = validParameter.validFile(parameters, "minchunk", false);                             if (minchunk == "not found")            { useMinchunk = false; minchunk = "64";                         }       else{ useMinchunk = true;               }
339                         idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found")      { useIdsmoothwindow = false; idsmoothwindow = "32";     }       else{ useIdsmoothwindow = true; }
340                         minsmoothid = validParameter.validFile(parameters, "minsmoothid", false);               if (minsmoothid == "not found")         { useMinsmoothid = false; minsmoothid = "0.95";         }       else{ useMinsmoothid = true;    }
341                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
342                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
343                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
344                         
345                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
346                         ucl = m->isTrue(temp);
347                         
348                         queryfract = validParameter.validFile(parameters, "queryfract", false);                 if (queryfract == "not found")          { useQueryfract = false; queryfract = "0.5";            }       else{ useQueryfract = true;             }
349                         if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; }
350                         
351                         temp = validParameter.validFile(parameters, "skipgaps", false);                                 if (temp == "not found") { temp = "t"; }
352                         skipgaps = m->isTrue(temp); 
353
354                         temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
355                         skipgaps2 = m->isTrue(temp); 
356
357                 }
358         }
359         catch(exception& e) {
360                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
361                 exit(1);
362         }
363 }
364 //***************************************************************************************************************
365
366 int ChimeraUchimeCommand::execute(){
367         try{
368                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
369                 
370                 m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n");
371                 
372                 for (int s = 0; s < fastaFileNames.size(); s++) {
373                         
374                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
375                         
376                         int start = time(NULL); 
377                         string nameFile = "";
378                         
379                         if (templatefile == "self") { //you want to run uchime with a refernce template
380                                 
381                                 #ifdef USE_MPI  
382                                         int pid; 
383                                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
384                                         if (pid == 0) { //you are the root process 
385                                 #endif  
386                                 
387                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
388                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
389                                         nameFile = nameFileNames[s];
390                                 }else {
391                                         m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
392                                         
393                                         //use unique.seqs to create new name and fastafile
394                                         string inputString = "fasta=" + fastaFileNames[s];
395                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
396                                         m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
397                                         
398                                         Command* uniqueCommand = new DeconvoluteCommand(inputString);
399                                         uniqueCommand->execute();
400                                         
401                                         map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
402                                         
403                                         delete uniqueCommand;
404                                         
405                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
406                                         
407                                         nameFile = filenames["name"][0];
408                                         fastaFileNames[s] = filenames["fasta"][0];
409                                 }
410                                 
411                                 //create input file for uchime
412                                 //read through fastafile and store info
413                                 map<string, string> seqs;
414                                 ifstream in;
415                                 m->openInputFile(fastaFileNames[s], in);
416                                 
417                                 while (!in.eof()) {
418                                         
419                                         if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  return 0; }
420                                         
421                                         Sequence seq(in); m->gobble(in);
422                                         seqs[seq.getName()] = seq.getAligned();
423                                 }
424                                 in.close();
425                                 
426                                 //read namefile
427                                 vector<seqPriorityNode> nameMapCount;
428                                 int error = m->readNames(nameFile, nameMapCount, seqs);
429                                 
430                                 if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0; }
431                                 
432                                 if (error == 1) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0; }
433                                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); }  return 0; }
434                                 
435                                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
436                                 
437                                 string newFasta = fastaFileNames[s] + ".temp";
438                                 ofstream out;
439                                 m->openOutputFile(newFasta, out);
440                                 
441                                 //print new file in order of
442                                 for (int i = 0; i < nameMapCount.size(); i++) {
443                                         out << ">" << nameMapCount[i].name  << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
444                                 }
445                                 out.close();
446                                 
447                                 fastaFileNames[s] = newFasta;
448                                                 
449                                 #ifdef USE_MPI  
450                                         }
451                                 #endif
452                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); }  return 0;    }                               
453                         }
454                         
455                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
456                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera";
457                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.accnos";
458                         string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "uchime.alns";
459                         
460                         if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); }  return 0;    }
461                         
462                         int numSeqs = 0;
463 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464                         if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
465                         else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); }
466 #else
467                         numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName);
468 #endif
469                         if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } return 0; }
470                         
471                         //remove file made for uchime
472                         if (templatefile == "self") {  remove(fastaFileNames[s].c_str()); }
473                         
474                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
475                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
476                         if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
477                         
478                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
479                 }
480                 
481                 //set accnos file as new current accnosfile
482                 string current = "";
483                 itTypes = outputTypes.find("accnos");
484                 if (itTypes != outputTypes.end()) {
485                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
486                 }
487                 
488                 m->mothurOutEndLine();
489                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
490                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
491                 m->mothurOutEndLine();
492                 
493                 return 0;
494                 
495         }
496         catch(exception& e) {
497                 m->errorOut(e, "ChimeraUchimeCommand", "execute");
498                 exit(1);
499         }
500 }
501 //**********************************************************************************************************************
502
503 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){
504         try {
505                 
506                 vector<char*> cPara;
507                 
508                 char* tempUchime = new char[8];  
509                 strcpy(tempUchime, "./uchime "); 
510                 cPara.push_back(tempUchime);
511                 
512                 char* tempIn = new char[7];  
513                 strcpy(tempIn, "--input"); 
514                 cPara.push_back(tempIn);
515                 char* temp = new char[filename.length()];
516                 strcpy(temp, filename.c_str());
517                 cPara.push_back(temp);
518                 
519                 //are you using a reference file
520                 if (templatefile != "self") {
521                                                 
522                         //add reference file
523                         char* tempRef = new char[4]; 
524                         strcpy(tempRef, "--db"); 
525                         cPara.push_back(tempRef);  
526                         char* tempR = new char[templatefile.length()];
527                         strcpy(tempR, templatefile.c_str());
528                         cPara.push_back(tempR);
529                 }
530                 
531                 char* tempO = new char[11]; 
532                 strcpy(tempO, "--uchimeout"); 
533                 cPara.push_back(tempO);
534                 char* tempout = new char[outputFName.length()];
535                 strcpy(tempout, outputFName.c_str());
536                 cPara.push_back(tempout);
537                 
538                 if (chimealns) {
539                         char* tempA = new char[12]; 
540                         strcpy(tempA, "--uchimealns"); 
541                         cPara.push_back(tempA);
542                         char* tempa = new char[alns.length()];
543                         strcpy(tempa, alns.c_str());
544                         cPara.push_back(tempa);
545                 }
546                 
547                 if (useAbskew) {
548                         char* tempskew = new char[8]; 
549                         strcpy(tempskew, "--abskew"); 
550                         cPara.push_back(tempskew);
551                         char* tempSkew = new char[abskew.length()];
552                         strcpy(tempSkew, abskew.c_str());
553                         cPara.push_back(tempSkew);
554                 }
555                 
556                 if (useMinH) {
557                         char* tempminh = new char[6]; 
558                         strcpy(tempminh, "--minh"); 
559                         cPara.push_back(tempminh);
560                         char* tempMinH = new char[minh.length()];
561                         strcpy(tempMinH, minh.c_str());
562                         cPara.push_back(tempMinH);
563                 }
564                 
565                 if (useMindiv) {
566                         char* tempmindiv = new char[8]; 
567                         strcpy(tempmindiv, "--mindiv"); 
568                         cPara.push_back(tempmindiv);
569                         char* tempMindiv = new char[mindiv.length()];
570                         strcpy(tempMindiv, mindiv.c_str());
571                         cPara.push_back(tempMindiv);
572                 }
573                 
574                 if (useXn) {
575                         char* tempxn = new char[4]; 
576                         strcpy(tempxn, "--xn"); 
577                         cPara.push_back(tempxn);
578                         char* tempXn = new char[xn.length()];
579                         strcpy(tempXn, xn.c_str());
580                         cPara.push_back(tempXn);
581                 }
582                 
583                 if (useDn) {
584                         char* tempdn = new char[4]; 
585                         strcpy(tempdn, "--dn"); 
586                         cPara.push_back(tempdn);
587                         char* tempDn = new char[dn.length()];
588                         strcpy(tempDn, dn.c_str());
589                         cPara.push_back(tempDn);
590                 }
591                 
592                 if (useXa) {
593                         char* tempxa = new char[4]; 
594                         strcpy(tempxa, "--xa"); 
595                         cPara.push_back(tempxa);
596                         char* tempXa = new char[xa.length()];
597                         strcpy(tempXa, xa.c_str());
598                         cPara.push_back(tempXa);
599                 }
600                 
601                 if (useChunks) {
602                         char* tempchunks = new char[8]; 
603                         strcpy(tempchunks, "--chunks"); 
604                         cPara.push_back(tempchunks);
605                         char* tempChunks = new char[chunks.length()];
606                         strcpy(tempChunks, chunks.c_str());
607                         cPara.push_back(tempChunks);
608                 }
609                 
610                 if (useMinchunk) {
611                         char* tempminchunk = new char[10]; 
612                         strcpy(tempminchunk, "--minchunk"); 
613                         cPara.push_back(tempminchunk);
614                         char* tempMinchunk = new char[minchunk.length()];
615                         strcpy(tempMinchunk, minchunk.c_str());
616                         cPara.push_back(tempMinchunk);
617                 }
618                 
619                 if (useIdsmoothwindow) {
620                         char* tempidsmoothwindow = new char[16]; 
621                         strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
622                         cPara.push_back(tempidsmoothwindow);
623                         char* tempIdsmoothwindow = new char[idsmoothwindow.length()];
624                         strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
625                         cPara.push_back(tempIdsmoothwindow);
626                 }
627                 
628                 if (useMinsmoothid) {
629                         char* tempminsmoothid = new char[13]; 
630                         strcpy(tempminsmoothid, "--minsmoothid"); 
631                         cPara.push_back(tempminsmoothid);
632                         char* tempMinsmoothid = new char[minsmoothid.length()];
633                         strcpy(tempMinsmoothid, minsmoothid.c_str());
634                         cPara.push_back(tempMinsmoothid);
635                 }
636                 
637                 if (useMaxp) {
638                         char* tempmaxp = new char[6]; 
639                         strcpy(tempmaxp, "--maxp"); 
640                         cPara.push_back(tempmaxp);
641                         char* tempMaxp = new char[maxp.length()];
642                         strcpy(tempMaxp, maxp.c_str());
643                         cPara.push_back(tempMaxp);
644                 }
645                 
646                 if (!skipgaps) {
647                         char* tempskipgaps = new char[14]; 
648                         strcpy(tempskipgaps, "--[no]skipgaps"); 
649                         cPara.push_back(tempskipgaps);
650                 }
651                 
652                 if (!skipgaps2) {
653                         char* tempskipgaps2 = new char[15]; 
654                         strcpy(tempskipgaps2, "--[no]skipgaps2"); 
655                         cPara.push_back(tempskipgaps2);
656                 }
657                 
658                 if (useMinlen) {
659                         char* tempminlen = new char[8]; 
660                         strcpy(tempminlen, "--minlen"); 
661                         cPara.push_back(tempminlen);
662                         char* tempMinlen = new char[minlen.length()];
663                         strcpy(tempMinlen, minlen.c_str());
664                         cPara.push_back(tempMinlen);
665                 }
666                 
667                 if (useMaxlen) {
668                         char* tempmaxlen = new char[8]; 
669                         strcpy(tempmaxlen, "--maxlen"); 
670                         cPara.push_back(tempmaxlen);
671                         char* tempMaxlen = new char[maxlen.length()];
672                         strcpy(tempMaxlen, maxlen.c_str());
673                         cPara.push_back(tempMaxlen);
674                 }
675                 
676                 if (ucl) {
677                         char* tempucl = new char[5]; 
678                         strcpy(tempucl, "--ucl"); 
679                         cPara.push_back(tempucl);
680                 }
681                 
682                 if (useQueryfract) {
683                         char* tempqueryfract = new char[12]; 
684                         strcpy(tempqueryfract, "--queryfract"); 
685                         cPara.push_back(tempqueryfract);
686                         char* tempQueryfract = new char[queryfract.length()];
687                         strcpy(tempQueryfract, queryfract.c_str());
688                         cPara.push_back(tempQueryfract);
689                 }
690                 
691                 
692                 char** uchimeParameters;
693                 uchimeParameters = new char*[cPara.size()];
694                 for (int i = 0; i < cPara.size(); i++) {  uchimeParameters[i] = cPara[i];  } 
695                 int numArgs = cPara.size();
696                 
697                 uchime_main(numArgs, uchimeParameters); 
698                 
699                 //free memory
700                 for(int i = 0; i < cPara.size(); i++)  {  delete[] cPara[i];  }
701                 delete[] uchimeParameters; 
702                 
703                 if (m->control_pressed) { return 0; }
704                 
705                 //create accnos file from uchime results
706                 ifstream in; 
707                 m->openInputFile(outputFName, in);
708                 
709                 ofstream out;
710                 m->openOutputFile(accnos, out);
711                 
712                 int num = 0;
713                 while(!in.eof()) {
714                         
715                         if (m->control_pressed) { break; }
716                         
717                         string name = "";
718                         string chimeraFlag = "";
719                         in >> chimeraFlag >> name;
720                         
721                         //fix name if needed
722                         if (templatefile == "self") { 
723                                 name = name.substr(0, name.length()-1); //rip off last /
724                                 name = name.substr(0, name.find_last_of('/'));
725                         }
726                         
727                         for (int i = 0; i < 15; i++) {  in >> chimeraFlag; }
728                         m->gobble(in);
729                         
730                         if (chimeraFlag == "Y") {  out << name << endl; }
731                         num++;
732                 }
733                 in.close();
734                 out.close();
735                 
736                 return num;
737         }
738         catch(exception& e) {
739                 m->errorOut(e, "ChimeraUchimeCommand", "driver");
740                 exit(1);
741         }
742 }
743 /**************************************************************************************************/
744
745 int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) {
746         try {
747                 
748                 processIDS.clear();
749                 int process = 1;
750                 int num = 0;
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)           
752                 //break up file into multiple files
753                 vector<string> files;
754                 m->divideFile(filename, processors, files);
755                 
756                 if (m->control_pressed) {  return 0;  }
757                 
758 #ifdef USE_MPI  
759                 int pid, numSeqsPerProcessor; 
760                 int tag = 2001;
761                 
762                 MPI_Status status; 
763                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
764                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
765                                 
766                 if (pid == 0) { //you are the root process 
767                         num = driver(outputFileName, files[0], accnos, alns);
768                         
769                         if (templatefile != "self") {
770                                 //wait on chidren
771                                 for(int j = 1; j < processors; j++) { 
772                                         int temp;
773                                         MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status);
774                                         num += temp;
775                                         
776                                         m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName);
777                                         remove((outputFileName + toString(j) + ".temp").c_str());
778                                         
779                                         m->appendFiles((accnos + toString(j) + ".temp"), accnos);
780                                         remove((accnos + toString(j) + ".temp").c_str());
781                                         
782                                         if (chimealns) {
783                                                 m->appendFiles((alns + toString(j) + ".temp"), alns);
784                                                 remove((alns + toString(j) + ".temp").c_str());
785                                         }
786                                 }
787                         }
788                 }else{ //you are a child process
789                         if (templatefile != "self") { //if template=self we can only use 1 processor
790                                 num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp"); 
791                                 
792                                 //send numSeqs to parent
793                                 MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
794                         }
795                 }
796
797                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
798 #else
799                 
800                 //loop through and create all the processes you want
801                 while (process != processors) {
802                         int pid = fork();
803                         
804                         if (pid > 0) {
805                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
806                                 process++;
807                         }else if (pid == 0){
808                                 num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp");
809                                 
810                                 //pass numSeqs to parent
811                                 ofstream out;
812                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
813                                 m->openOutputFile(tempFile, out);
814                                 out << num << endl;
815                                 out.close();
816                                 
817                                 exit(0);
818                         }else { 
819                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
820                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
821                                 exit(0);
822                         }
823                 }
824                 
825                 //do my part
826                 num = driver(outputFileName, files[0], accnos, alns);
827                 
828                 //force parent to wait until all the processes are done
829                 for (int i=0;i<processIDS.size();i++) { 
830                         int temp = processIDS[i];
831                         wait(&temp);
832                 }
833                 
834                 for (int i = 0; i < processIDS.size(); i++) {
835                         ifstream in;
836                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
837                         m->openInputFile(tempFile, in);
838                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
839                         in.close(); remove(tempFile.c_str());
840                 }
841                 
842                 
843                 //append output files
844                 for(int i=0;i<processIDS[i];i++){
845                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
846                         remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
847                         
848                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
849                         remove((accnos + toString(processIDS[i]) + ".temp").c_str());
850                         
851                         if (chimealns) {
852                                 m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
853                                 remove((alns + toString(processIDS[i]) + ".temp").c_str());
854                         }
855                 }
856 #endif          
857                 //get rid of the file pieces.
858                 for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
859 #endif          
860                 return num;     
861         }
862         catch(exception& e) {
863                 m->errorOut(e, "ChimeraUchimeCommand", "createProcesses");
864                 exit(1);
865         }
866 }
867
868 /**************************************************************************************************/
869