]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayercommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / chimeraslayercommand.cpp
1 /*
2  *  chimeraslayercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 3/31/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayercommand.h"
11 #include "chimeraslayer.h"
12 #include "deconvolutecommand.h"
13 #include "referencedb.h"
14
15 //**********************************************************************************************************************
16 vector<string> ChimeraSlayerCommand::setParameters(){   
17         try {
18                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pwindow("window", "Number", "", "50", "", "", "",false,false); parameters.push_back(pwindow);
22                 CommandParameter pksize("ksize", "Number", "", "7", "", "", "",false,false); parameters.push_back(pksize);
23                 CommandParameter pmatch("match", "Number", "", "5.0", "", "", "",false,false); parameters.push_back(pmatch);
24                 CommandParameter pmismatch("mismatch", "Number", "", "-4.0", "", "", "",false,false); parameters.push_back(pmismatch);
25                 CommandParameter pminsim("minsim", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminsim);
26                 CommandParameter pmincov("mincov", "Number", "", "70", "", "", "",false,false); parameters.push_back(pmincov);
27                 CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminsnp);
28                 CommandParameter pminbs("minbs", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminbs);
29                 CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "",false,false); parameters.push_back(psearch);
30                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
31                 CommandParameter prealign("realign", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(prealign);
32                 CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptrim);
33                 CommandParameter psplit("split", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psplit);
34                 CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "",false,false); parameters.push_back(pnumwanted);
35                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
36                 CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "",false,false); parameters.push_back(pdivergence);
37                 CommandParameter pparents("parents", "Number", "", "3", "", "", "",false,false); parameters.push_back(pparents);
38                 CommandParameter pincrement("increment", "Number", "", "5", "", "", "",false,false); parameters.push_back(pincrement);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
42
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ChimeraSlayerCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string ChimeraSlayerCommand::getHelpString(){   
54         try {
55                 string helpString = "";
56                 helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
57                 helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
58                 helpString += "The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, and realign.\n";
59                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
60                 helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
61                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
62                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
63                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
64 #ifdef USE_MPI
65                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
66 #endif
67                 helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n";
68                 helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n";
69                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n";
70                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n";
71                 helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n";
72                 helpString += "The ksize parameter allows you to input kmersize, default is 7, used if search is kmer. \n";
73                 helpString += "The match parameter allows you to reward matched bases in blast search, default is 5. \n";
74                 helpString += "The parents parameter allows you to select the number of potential parents to investigate from the numwanted best matches after rating them, default is 3. \n";
75                 helpString += "The mismatch parameter allows you to penalize mismatched bases in blast search, default is -4. \n";
76                 helpString += "The divergence parameter allows you to set a cutoff for chimera determination, default is 1.007. \n";
77                 helpString += "The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method, default=1000.\n";
78                 helpString += "The minsim parameter allows you to specify a minimum similarity with the parent fragments, default=90. \n";
79                 helpString += "The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n";
80                 helpString += "The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n";
81                 helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n";
82                 helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast, and kmer, default blast. \n";
83                 helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true.  \n";
84                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
85                 helpString += "The chimera.slayer command should be in the following format: \n";
86                 helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
87                 helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
88                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
89                 return helpString;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "ChimeraSlayerCommand", "getHelpString");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97 ChimeraSlayerCommand::ChimeraSlayerCommand(){   
98         try {
99                 abort = true; calledHelp = true;
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["chimera"] = tempOutNames;
103                 outputTypes["accnos"] = tempOutNames;
104                 outputTypes["fasta"] = tempOutNames;
105         }
106         catch(exception& e) {
107                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
108                 exit(1);
109         }
110 }
111 //***************************************************************************************************************
112 ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
113         try {
114                 abort = false; calledHelp = false;   
115                 ReferenceDB* rdb = ReferenceDB::getInstance();
116                 
117                 //allow user to run help
118                 if(option == "help") { help(); abort = true; calledHelp = true; }
119                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120                 
121                 else {
122                         vector<string> myArray = setParameters();
123                         
124                         OptionParser parser(option);
125                         map<string,string> parameters = parser.getParameters();
126                         
127                         ValidParameters validParameter("chimera.slayer");
128                         map<string,string>::iterator it;
129                         
130                         //check to make sure all parameters are valid for command
131                         for (it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         vector<string> tempOutNames;
136                         outputTypes["chimera"] = tempOutNames;
137                         outputTypes["accnos"] = tempOutNames;
138                         outputTypes["fasta"] = tempOutNames;
139                 
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                                                 
144                         //check for required parameters
145                         fastafile = validParameter.validFile(parameters, "fasta", false);
146                         if (fastafile == "not found") {                                 
147                                 //if there is a current fasta file, use it
148                                 string filename = m->getFastaFile(); 
149                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
151                         }else { 
152                                 m->splitAtDash(fastafile, fastaFileNames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < fastaFileNames.size(); i++) {
156                                         
157                                         bool ignore = false;
158                                         if (fastaFileNames[i] == "current") { 
159                                                 fastaFileNames[i] = m->getFastaFile(); 
160                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
161                                                 else {  
162                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
163                                                         //erase from file list
164                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
165                                                         i--;
166                                                 }
167                                         }
168                                         
169                                         if (!ignore) {
170                                                 
171                                                 if (inputDir != "") {
172                                                         string path = m->hasPath(fastaFileNames[i]);
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
175                                                 }
176                 
177                                                 int ableToOpen;
178                                                 ifstream in;
179                                                 
180                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
181                                         
182                                                 //if you can't open it, try default location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getDefaultPath() != "") { //default path is set
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 if (ableToOpen == 1) {
195                                                         if (m->getOutputDir() != "") { //default path is set
196                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
198                                                                 ifstream in2;
199                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
200                                                                 in2.close();
201                                                                 fastaFileNames[i] = tryPath;
202                                                         }
203                                                 }
204                                                 
205                                                 in.close();
206                                                 
207                                                 if (ableToOpen == 1) { 
208                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
209                                                         //erase from file list
210                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
211                                                         i--;
212                                                 }else {
213                                                         m->setFastaFile(fastaFileNames[i]);
214                                                 }
215                                         }
216                                 }
217                                 
218                                 //make sure there is at least one valid file left
219                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
220                         }
221                         
222                         
223                         //check for required parameters
224                         bool hasName = true;
225                         namefile = validParameter.validFile(parameters, "name", false);
226                         if (namefile == "not found") { namefile = "";  hasName = false; }
227                         else { 
228                                 m->splitAtDash(namefile, nameFileNames);
229                                 
230                                 //go through files and make sure they are good, if not, then disregard them
231                                 for (int i = 0; i < nameFileNames.size(); i++) {
232                                         
233                                         bool ignore = false;
234                                         if (nameFileNames[i] == "current") { 
235                                                 nameFileNames[i] = m->getNameFile(); 
236                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
237                                                 else {  
238                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
239                                                         //erase from file list
240                                                         nameFileNames.erase(nameFileNames.begin()+i);
241                                                         i--;
242                                                 }
243                                         }
244                                         
245                                         if (!ignore) {
246                                                 
247                                                 if (inputDir != "") {
248                                                         string path = m->hasPath(nameFileNames[i]);
249                                                         //if the user has not given a path then, add inputdir. else leave path alone.
250                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
251                                                 }
252                                                 
253                                                 int ableToOpen;
254                                                 ifstream in;
255                                                 
256                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
257                                                 
258                                                 //if you can't open it, try default location
259                                                 if (ableToOpen == 1) {
260                                                         if (m->getDefaultPath() != "") { //default path is set
261                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
262                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
263                                                                 ifstream in2;
264                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
265                                                                 in2.close();
266                                                                 nameFileNames[i] = tryPath;
267                                                         }
268                                                 }
269                                                 
270                                                 if (ableToOpen == 1) {
271                                                         if (m->getOutputDir() != "") { //default path is set
272                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
273                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
274                                                                 ifstream in2;
275                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
276                                                                 in2.close();
277                                                                 nameFileNames[i] = tryPath;
278                                                         }
279                                                 }
280                                                 
281                                                 in.close();
282                                                 
283                                                 if (ableToOpen == 1) { 
284                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
285                                                         //erase from file list
286                                                         nameFileNames.erase(nameFileNames.begin()+i);
287                                                         i--;
288                                                 }else {
289                                                         m->setNameFile(nameFileNames[i]);
290                                                 }
291                                         }
292                                 }
293                                 
294                                 //make sure there is at least one valid file left
295                                 if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; }
296                         }
297                         
298                         if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
299                         
300                         //if the user changes the output directory command factory will send this info to us in the output parameter 
301                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
302                         
303                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
304                         m->setProcessors(temp);
305                         convert(temp, processors);
306                         
307                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
308                         save = m->isTrue(temp); 
309                         rdb->save = save; 
310                         if (save) { //clear out old references
311                                 rdb->clearMemory();     
312                         }
313                         
314                         string path;
315                         it = parameters.find("reference");
316                         //user has given a template file
317                         if(it != parameters.end()){ 
318                                 if (it->second == "self") { 
319                                         templatefile = "self"; 
320                                         if (save) {
321                                                 m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
322                                                 m->mothurOutEndLine();
323                                                 save = false;
324                                         }
325                                 }
326                                 else {
327                                         path = m->hasPath(it->second);
328                                         //if the user has not given a path then, add inputdir. else leave path alone.
329                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
330                                         
331                                         templatefile = validParameter.validFile(parameters, "reference", true);
332                                         if (templatefile == "not open") { abort = true; }
333                                         else if (templatefile == "not found") { //check for saved reference sequences
334                                                 if (rdb->referenceSeqs.size() != 0) {
335                                                         templatefile = "saved";
336                                                 }else {
337                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
338                                                         m->mothurOutEndLine();
339                                                         abort = true; 
340                                                 }
341                                         }else { if (save) {     rdb->setSavedReference(templatefile);   }       }       
342                                 }
343                         }else if (hasName) {  templatefile = "self"; 
344                                 if (save) {
345                                         m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
346                                         m->mothurOutEndLine();
347                                         save = false;
348                                 }
349                         }
350                         else { 
351                                 if (rdb->referenceSeqs.size() != 0) {
352                                         templatefile = "saved";
353                                 }else {
354                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
355                                         m->mothurOutEndLine();
356                                         templatefile = ""; abort = true; 
357                                 } 
358                         }
359                         
360                         
361                         
362                         temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
363                         convert(temp, ksize);
364                                                 
365                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "50"; }                       
366                         convert(temp, window);
367                         
368                         temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
369                         convert(temp, match);
370                         
371                         temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
372                         convert(temp, mismatch);
373                         
374                         temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
375                         convert(temp, divR);
376                         
377                         temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
378                         convert(temp, minSimilarity);
379                         
380                         temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
381                         convert(temp, minCoverage);
382                         
383                         temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
384                         convert(temp, minBS);
385                         
386                         temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
387                         convert(temp, minSNP);
388
389                         temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
390                         convert(temp, parents); 
391                         
392                         temp = validParameter.validFile(parameters, "realign", false);                  if (temp == "not found") { temp = "t"; }
393                         realign = m->isTrue(temp); 
394                         
395                         temp = validParameter.validFile(parameters, "trim", false);                             if (temp == "not found") { temp = "f"; }
396                         trim = m->isTrue(temp); 
397                         
398                         temp = validParameter.validFile(parameters, "split", false);                    if (temp == "not found") { temp = "f"; }
399                         trimera = m->isTrue(temp); 
400                         
401                         search = validParameter.validFile(parameters, "search", false);                 if (search == "not found") { search = "blast"; }
402                         
403                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }             
404                         convert(temp, iters); 
405                          
406                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "5"; }
407                         convert(temp, increment);
408                         
409                         temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "15"; }               
410                         convert(temp, numwanted);
411
412                         if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
413                         
414                         if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
415                 }
416         }
417         catch(exception& e) {
418                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
419                 exit(1);
420         }
421 }
422 //***************************************************************************************************************
423
424 int ChimeraSlayerCommand::execute(){
425         try{
426                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
427                         
428                 for (int s = 0; s < fastaFileNames.size(); s++) {
429                                 
430                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
431                 
432                         int start = time(NULL); 
433                         
434                         if (templatefile != "self") { //you want to run slayer with a refernce template
435                                 chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);     
436                         }else {
437                                 if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
438                                 string nameFile = "";
439                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
440                                         nameFile = nameFileNames[s];
441                                 }else {
442                                         m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
443                                         
444                                         //use unique.seqs to create new name and fastafile
445                                         string inputString = "fasta=" + fastaFileNames[s];
446                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
447                                         m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
448                                                                  
449                                         Command* uniqueCommand = new DeconvoluteCommand(inputString);
450                                         uniqueCommand->execute();
451                                         
452                                         map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
453                                         
454                                         delete uniqueCommand;
455                                         
456                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
457                                         
458                                         nameFile = filenames["name"][0];
459                                         fastaFileNames[s] = filenames["fasta"][0];
460                                 }
461                                 
462                                 //sort fastafile by abundance, returns new sorted fastafile name
463                                 m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
464                                 map<string, int> priority = sortFastaFile(fastaFileNames[s], nameFile);
465                                 m->mothurOut("Done."); m->mothurOutEndLine();
466                                 
467                                 if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
468
469                                 chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);   
470                         }
471                                 
472                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
473                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera";
474                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "slayer.accnos";
475                         string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "slayer.fasta";
476                         
477                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
478                         
479                         if (chimera->getUnaligned()) { 
480                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
481                                 delete chimera;
482                                 return 0; 
483                         }
484                         templateSeqsLength = chimera->getLength();
485                         
486                 #ifdef USE_MPI  
487                         int pid, numSeqsPerProcessor; 
488                                 int tag = 2001;
489                                 vector<unsigned long int> MPIPos;
490                                 
491                                 MPI_Status status; 
492                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
493                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
494
495                                 MPI_File inMPI;
496                                 MPI_File outMPI;
497                                 MPI_File outMPIAccnos;
498                                 MPI_File outMPIFasta;
499                                 
500                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
501                                 int inMode=MPI_MODE_RDONLY; 
502                                 
503                                 char outFilename[1024];
504                                 strcpy(outFilename, outputFileName.c_str());
505                                 
506                                 char outAccnosFilename[1024];
507                                 strcpy(outAccnosFilename, accnosFileName.c_str());
508                         
509                                 char outFastaFilename[1024];
510                                 strcpy(outFastaFilename, trimFastaFileName.c_str());
511                                 
512                                 char inFileName[1024];
513                                 strcpy(inFileName, fastaFileNames[s].c_str());
514
515                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
516                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
517                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
518                                 if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
519
520                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }   delete chimera; return 0;  }
521                         
522                                 if (pid == 0) { //you are the root process 
523                                         m->mothurOutEndLine();
524                                         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
525                                         m->mothurOutEndLine();
526                 
527                                         string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
528                                         
529                                         //print header
530                                         int length = outTemp.length();
531                                         char* buf2 = new char[length];
532                                         memcpy(buf2, outTemp.c_str(), length);
533
534                                         MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
535                                         delete buf2;
536
537                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
538                                         
539                                         if (templatefile != "self") { //if template=self we can only use 1 processor
540                                                 //send file positions to all processes
541                                                 for(int i = 1; i < processors; i++) { 
542                                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
543                                                         MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
544                                                 }
545                                         }
546                                         //figure out how many sequences you have to align
547                                         numSeqsPerProcessor = numSeqs / processors;
548                                         int startIndex =  pid * numSeqsPerProcessor;
549                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
550                                         
551                                         if (templatefile == "self") { //if template=self we can only use 1 processor
552                                                 startIndex = 0;
553                                                 numSeqsPerProcessor = numSeqs;
554                                         }
555                                         
556                                         //do your part
557                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
558                                         
559                                         int numNoParents = chimera->getNumNoParents();
560                                         int temp;
561                                         for(int i = 1; i < processors; i++) { 
562                                                 MPI_Recv(&temp, 1, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
563                                                 numNoParents += temp;
564                                         }
565                                         
566                                         
567                                         if (numSeqs == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
568                                         
569                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  delete chimera; return 0;  }
570
571                                 }else{ //you are a child process
572                                         if (templatefile != "self") { //if template=self we can only use 1 processor
573                                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
574                                                 MPIPos.resize(numSeqs+1);
575                                                 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
576                                         
577                                                 //figure out how many sequences you have to align
578                                                 numSeqsPerProcessor = numSeqs / processors;
579                                                 int startIndex =  pid * numSeqsPerProcessor;
580                                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
581                                         
582                                                 //do your part
583                                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos);
584                                                 
585                                                 int numNoParents = chimera->getNumNoParents();
586                                                 MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
587                                         
588                                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
589                                 
590                                         }
591                                 }
592                                 
593                                 //close files 
594                                 MPI_File_close(&inMPI);
595                                 MPI_File_close(&outMPI);
596                                 MPI_File_close(&outMPIAccnos); 
597                                 if (trim) { MPI_File_close(&outMPIFasta); }
598                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
599                                 
600                 #else
601                         ofstream outHeader;
602                         string tempHeader = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimeras.tempHeader";
603                         m->openOutputFile(tempHeader, outHeader);
604                         
605                         chimera->printHeader(outHeader);
606                         outHeader.close();
607                         
608                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
609                                 
610                         for (int i = 0; i < (positions.size()-1); i++) {
611                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
612                         }       
613
614                         //break up file
615                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
616                                 if(processors == 1){
617                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
618                                         
619                                         int numNoParents = chimera->getNumNoParents();
620                                         if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
621                                         
622                                         if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
623                                         
624                                 }else{
625                                         processIDS.resize(0);
626                                         
627                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); 
628                                 
629                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
630                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
631                                         if (trim) {  rename((trimFastaFileName + toString(processIDS[0]) + ".temp").c_str(), trimFastaFileName.c_str()); }
632                                                 
633                                         //append output files
634                                         for(int i=1;i<processors;i++){
635                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
636                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
637                                         }
638                                         
639                                         //append output files
640                                         for(int i=1;i<processors;i++){
641                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
642                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
643                                         }
644                                         
645                                         if (trim) {
646                                                 for(int i=1;i<processors;i++){
647                                                         m->appendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName);
648                                                         m->mothurRemove((trimFastaFileName + toString(processIDS[i]) + ".temp"));
649                                                 }
650                                         }
651                                         
652                                         if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
653                                 }
654
655                         #else
656                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName);
657                                 
658                                 int numNoParents = chimera->getNumNoParents();
659                                 if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
660
661                                 
662                                 if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
663                                 
664                         #endif
665                         
666                         m->appendFiles(outputFileName, tempHeader);
667                 
668                         m->mothurRemove(outputFileName);
669                         rename(tempHeader.c_str(), outputFileName.c_str());
670                         
671                 #endif
672                         delete chimera;
673                         
674                         
675                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
676                         
677                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
678                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
679                         if (trim) {  outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }
680                         
681                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
682                 }
683                 
684                 //set accnos file as new current accnosfile
685                 string current = "";
686                 itTypes = outputTypes.find("accnos");
687                 if (itTypes != outputTypes.end()) {
688                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
689                 }
690                 
691                 if (trim) {
692                         itTypes = outputTypes.find("fasta");
693                         if (itTypes != outputTypes.end()) {
694                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
695                         }
696                 }
697                 
698                 m->mothurOutEndLine();
699                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
700                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
701                 m->mothurOutEndLine();
702
703                 return 0;
704                 
705         }
706         catch(exception& e) {
707                 m->errorOut(e, "ChimeraSlayerCommand", "execute");
708                 exit(1);
709         }
710 }
711 //**********************************************************************************************************************
712
713 int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos, string fasta){
714         try {
715                 ofstream out;
716                 m->openOutputFile(outputFName, out);
717                 
718                 ofstream out2;
719                 m->openOutputFile(accnos, out2);
720                 
721                 ofstream out3;
722                 if (trim) {  m->openOutputFile(fasta, out3); }
723                 
724                 ifstream inFASTA;
725                 m->openInputFile(filename, inFASTA);
726
727                 inFASTA.seekg(filePos->start);
728
729                 bool done = false;
730                 int count = 0;
731         
732                 while (!done) {
733                 
734                         if (m->control_pressed) {       out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1;       }
735                 
736                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
737                         string candidateAligned = candidateSeq->getAligned();
738                         
739                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
740                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
741                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
742                                 }else{
743                                         //find chimeras
744                                         chimera->getChimeras(candidateSeq);
745                                         
746                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
747                                                 
748                                         //if you are not chimeric, then check each half
749                                         data_results wholeResults = chimera->getResults();
750                                         
751                                         //determine if we need to split
752                                         bool isChimeric = false;
753                                         
754                                         if (wholeResults.flag == "yes") {
755                                                 string chimeraFlag = "no";
756                                                 if(  (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR)
757                                                    ||
758                                                    (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
759                                                 
760                                                 
761                                                 if (chimeraFlag == "yes") {     
762                                                         if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; }
763                                                 }
764                                         }
765                                         
766                                         if ((!isChimeric) && trimera) {
767                                                 
768                                                 //split sequence in half by bases
769                                                 string leftQuery, rightQuery;
770                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
771                                                 divideInHalf(tempSeq, leftQuery, rightQuery);
772                                                 
773                                                 //run chimeraSlayer on each piece
774                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
775                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
776                                                 
777                                                 //find chimeras
778                                                 chimera->getChimeras(left);
779                                                 data_results leftResults = chimera->getResults();
780                                                 
781                                                 chimera->getChimeras(right);
782                                                 data_results rightResults = chimera->getResults();
783                                                 
784                                                 //if either piece is chimeric then report
785                                                 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
786                                                 if (trim) { trimmed.printSequence(out3);  }
787                                                 
788                                                 delete left; delete right;
789                                                 
790                                         }else { //already chimeric
791                                                 //print results
792                                                 Sequence trimmed = chimera->print(out, out2);
793                                                 if (trim) { trimmed.printSequence(out3);  }
794                                         }
795                                         
796                                         
797                                 }
798                                 count++;
799                         }
800                         
801                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
802                                 unsigned long int pos = inFASTA.tellg();
803                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
804                         #else
805                                 if (inFASTA.eof()) { break; }
806                         #endif
807                         
808                         delete candidateSeq;
809                         //report progress
810                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
811                 }
812                 //report progress
813                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
814                 
815                 out.close();
816                 out2.close();
817                 if (trim) { out3.close(); }
818                 inFASTA.close();
819                                 
820                 return count;
821         }
822         catch(exception& e) {
823                 m->errorOut(e, "ChimeraSlayerCommand", "driver");
824                 exit(1);
825         }
826 }
827 //**********************************************************************************************************************
828 #ifdef USE_MPI
829 int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector<unsigned long int>& MPIPos){
830         try {                           
831                 MPI_Status status; 
832                 int pid;
833                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
834                 
835                 for(int i=0;i<num;i++){
836                         
837                         if (m->control_pressed) {       return 1;       }
838                         
839                         //read next sequence
840                         int length = MPIPos[start+i+1] - MPIPos[start+i];
841
842                         char* buf4 = new char[length];
843                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
844         
845                         string tempBuf = buf4;
846                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
847                         istringstream iss (tempBuf,istringstream::in);
848
849                         delete buf4;
850
851                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
852                         string candidateAligned = candidateSeq->getAligned();
853                 
854                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
855                                 
856                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
857                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
858                                 }else{
859                 
860                                         //find chimeras
861                                         chimera->getChimeras(candidateSeq);
862                         
863                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
864                                         
865                                         //if you are not chimeric, then check each half
866                                         data_results wholeResults = chimera->getResults();
867                                         
868                                         //determine if we need to split
869                                         bool isChimeric = false;
870                                         
871                                         if (wholeResults.flag == "yes") {
872                                                 string chimeraFlag = "no";
873                                                 if(  (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR)
874                                                    ||
875                                                    (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
876                                                 
877                                                 
878                                                 if (chimeraFlag == "yes") {     
879                                                         if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; }
880                                                 }
881                                         }
882                                         
883                                         if ((!isChimeric) && trimera) {                                                 
884                                                 //split sequence in half by bases
885                                                 string leftQuery, rightQuery;
886                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
887                                                 divideInHalf(tempSeq, leftQuery, rightQuery);
888                                                 
889                                                 //run chimeraSlayer on each piece
890                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
891                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
892                                                 
893                                                 //find chimeras
894                                                 chimera->getChimeras(left);
895                                                 data_results leftResults = chimera->getResults();
896                                                 
897                                                 chimera->getChimeras(right);
898                                                 data_results rightResults = chimera->getResults();
899                                                 
900                                                 //if either piece is chimeric then report
901                                                 Sequence trimmed = chimera->print(outMPI, outAccMPI, leftResults, rightResults);
902                                                 if (trim) {  
903                                                         string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";
904                                                         
905                                                         //write to accnos file
906                                                         int length = outputString.length();
907                                                         char* buf2 = new char[length];
908                                                         memcpy(buf2, outputString.c_str(), length);
909                                                         
910                                                         MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
911                                                         delete buf2;
912                                                 }
913                                                 
914                                                 delete left; delete right;
915                                                 
916                                         }else { 
917                                                 //print results
918                                                 Sequence trimmed = chimera->print(outMPI, outAccMPI);
919                                                 
920                                                 if (trim) {  
921                                                         string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";
922                                                         
923                                                         //write to accnos file
924                                                         int length = outputString.length();
925                                                         char* buf2 = new char[length];
926                                                         memcpy(buf2, outputString.c_str(), length);
927                                                         
928                                                         MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
929                                                         delete buf2;
930                                                 }
931                                         }
932                                         
933                                 }
934                         }
935                         delete candidateSeq;
936                         
937                         //report progress
938                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
939                 }
940                 //report progress
941                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
942                 
943                                 
944                 return 0;
945         }
946         catch(exception& e) {
947                 m->errorOut(e, "ChimeraSlayerCommand", "driverMPI");
948                 exit(1);
949         }
950 }
951 #endif
952
953 /**************************************************************************************************/
954
955 int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta) {
956         try {
957 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
958                 int process = 0;
959                 int num = 0;
960                 
961                 //loop through and create all the processes you want
962                 while (process != processors) {
963                         int pid = fork();
964                         
965                         if (pid > 0) {
966                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
967                                 process++;
968                         }else if (pid == 0){
969                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp");
970                                 
971                                 //pass numSeqs to parent
972                                 ofstream out;
973                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
974                                 m->openOutputFile(tempFile, out);
975                                 out << num << '\t' << chimera->getNumNoParents() << endl;
976                                 out.close();
977                                 exit(0);
978                         }else { 
979                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
980                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
981                                 exit(0);
982                         }
983                 }
984                 
985                 //force parent to wait until all the processes are done
986                 for (int i=0;i<processors;i++) { 
987                         int temp = processIDS[i];
988                         wait(&temp);
989                 }
990                 
991                 int numNoParents = 0;
992                 for (int i = 0; i < processIDS.size(); i++) {
993                         ifstream in;
994                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
995                         m->openInputFile(tempFile, in);
996                         if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; }
997                         in.close(); m->mothurRemove(tempFile);
998                 }
999                 
1000                 if (num == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
1001                 
1002                 return num;
1003 #endif          
1004         }
1005         catch(exception& e) {
1006                 m->errorOut(e, "ChimeraSlayerCommand", "createProcesses");
1007                 exit(1);
1008         }
1009 }
1010
1011 /**************************************************************************************************/
1012
1013 int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, string& rightQuery) {
1014         try {
1015                 
1016                 string queryUnAligned = querySeq.getUnaligned();
1017                 int numBases = int(queryUnAligned.length() * 0.5);
1018                 
1019                 string queryAligned = querySeq.getAligned();
1020                 leftQuery = querySeq.getAligned();
1021                 rightQuery = querySeq.getAligned();
1022                 
1023                 int baseCount = 0;
1024                 int leftSpot = 0;
1025                 for (int i = 0; i < queryAligned.length(); i++) {
1026                         //if you are a base
1027                         if (isalpha(queryAligned[i])) {         
1028                                 baseCount++; 
1029                         }
1030                         
1031                         //if you have half
1032                         if (baseCount >= numBases) {  leftSpot = i; break; } //first half
1033                 }
1034                 
1035                 //blank out right side
1036                 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
1037                 
1038                 //blank out left side
1039                 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
1040                 
1041                 return 0;
1042                 
1043         }
1044         catch(exception& e) {
1045                 m->errorOut(e, "ChimeraSlayerCommand", "divideInHalf");
1046                 exit(1);
1047         }
1048 }
1049 /**************************************************************************************************/
1050 map<string, int> ChimeraSlayerCommand::sortFastaFile(string fastaFile, string nameFile) {
1051         try {
1052                 map<string, int> nameAbund;
1053                 
1054                 //read through fastafile and store info
1055                 map<string, string> seqs;
1056                 ifstream in;
1057                 m->openInputFile(fastaFile, in);
1058                 
1059                 while (!in.eof()) {
1060                         
1061                         if (m->control_pressed) { in.close(); return nameAbund; }
1062                         
1063                         Sequence seq(in); m->gobble(in);
1064                         seqs[seq.getName()] = seq.getAligned();
1065                 }
1066                 
1067                 in.close();
1068                 
1069                 //read namefile
1070                 vector<seqPriorityNode> nameMapCount;
1071                 int error = m->readNames(nameFile, nameMapCount, seqs);
1072                 
1073                 if (m->control_pressed) { return nameAbund; }
1074                 
1075                 if (error == 1) { m->control_pressed = true; return nameAbund; }
1076                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return nameAbund; }
1077
1078                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1079                 
1080                 string newFasta = fastaFile + ".temp";
1081                 ofstream out;
1082                 m->openOutputFile(newFasta, out);
1083                 
1084                 //print new file in order of
1085                 for (int i = 0; i < nameMapCount.size(); i++) {
1086                         out << ">" << nameMapCount[i].name << endl << nameMapCount[i].seq << endl;
1087                         nameAbund[nameMapCount[i].name] = nameMapCount[i].numIdentical;
1088                 }
1089                 out.close();
1090                 
1091                 rename(newFasta.c_str(), fastaFile.c_str());
1092                                 
1093                 return nameAbund;
1094                 
1095         }
1096         catch(exception& e) {
1097                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
1098                 exit(1);
1099         }
1100 }
1101
1102 /**************************************************************************************************/
1103