]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayercommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / chimeraslayercommand.cpp
1 /*
2  *  chimeraslayercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 3/31/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayercommand.h"
11 #include "deconvolutecommand.h"
12 #include "referencedb.h"
13 #include "sequenceparser.h"
14 #include "counttable.h"
15
16 //**********************************************************************************************************************
17 vector<string> ChimeraSlayerCommand::setParameters(){   
18         try {
19                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
20                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
21                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
22          CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
24                 CommandParameter pwindow("window", "Number", "", "50", "", "", "",false,false); parameters.push_back(pwindow);
25                 CommandParameter pksize("ksize", "Number", "", "7", "", "", "",false,false); parameters.push_back(pksize);
26                 CommandParameter pmatch("match", "Number", "", "5.0", "", "", "",false,false); parameters.push_back(pmatch);
27                 CommandParameter pmismatch("mismatch", "Number", "", "-4.0", "", "", "",false,false); parameters.push_back(pmismatch);
28                 CommandParameter pminsim("minsim", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminsim);
29                 CommandParameter pmincov("mincov", "Number", "", "70", "", "", "",false,false); parameters.push_back(pmincov);
30                 CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminsnp);
31                 CommandParameter pminbs("minbs", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminbs);
32                 CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "",false,false); parameters.push_back(psearch);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
34                 CommandParameter prealign("realign", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(prealign);
35                 CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptrim);
36                 CommandParameter psplit("split", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psplit);
37                 CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "",false,false); parameters.push_back(pnumwanted);
38                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
39                 CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "",false,false); parameters.push_back(pdivergence);
40                 CommandParameter pparents("parents", "Number", "", "3", "", "", "",false,false); parameters.push_back(pparents);
41                 CommandParameter pincrement("increment", "Number", "", "5", "", "", "",false,false); parameters.push_back(pincrement);
42                 CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "",false,false); parameters.push_back(pblastlocation);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
46
47                 vector<string> myArray;
48                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
49                 return myArray;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "ChimeraSlayerCommand", "setParameters");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string ChimeraSlayerCommand::getHelpString(){   
58         try {
59                 string helpString = "";
60                 helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
61                 helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
62                 helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n";
63                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
64                 helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
65                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
66         helpString += "The count parameter allows you to provide a count file. The count file reference=self. If your count file contains group information, when checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
67                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
68                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
69                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
70 #ifdef USE_MPI
71                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
72 #endif
73                 helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n";
74                 helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n";
75                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n";
76                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n";
77                 helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n";
78                 helpString += "The ksize parameter allows you to input kmersize, default is 7, used if search is kmer. \n";
79                 helpString += "The match parameter allows you to reward matched bases in blast search, default is 5. \n";
80                 helpString += "The parents parameter allows you to select the number of potential parents to investigate from the numwanted best matches after rating them, default is 3. \n";
81                 helpString += "The mismatch parameter allows you to penalize mismatched bases in blast search, default is -4. \n";
82                 helpString += "The divergence parameter allows you to set a cutoff for chimera determination, default is 1.007. \n";
83                 helpString += "The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method, default=1000.\n";
84                 helpString += "The minsim parameter allows you to specify a minimum similarity with the parent fragments, default=90. \n";
85                 helpString += "The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n";
86                 helpString += "The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n";
87                 helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n";
88                 helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast, and kmer, default blast. \n";
89                 helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true.  \n";
90                 helpString += "The blastlocation parameter allows you to specify the location of your blast executable. By default mothur will look in ./blast/bin relative to mothur's executable.  \n";
91                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
92                 helpString += "The chimera.slayer command should be in the following format: \n";
93                 helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
94                 helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
95                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
96                 return helpString;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "ChimeraSlayerCommand", "getHelpString");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 string ChimeraSlayerCommand::getOutputFileNameTag(string type, string inputName=""){    
105         try {
106         string outputFileName = "";
107                 map<string, vector<string> >::iterator it;
108         
109         //is this a type this command creates
110         it = outputTypes.find(type);
111         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
112         else {
113             if (type == "chimera") {  outputFileName =  "slayer.chimeras"; }
114             else if (type == "accnos") {  outputFileName =  "slayer.accnos"; }
115             else if (type == "fasta") {  outputFileName =  "slayer.fasta"; }
116             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
117         }
118         return outputFileName;
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "ChimeraSlayerCommand", "getOutputFileNameTag");
122                 exit(1);
123         }
124 }
125 //**********************************************************************************************************************
126 ChimeraSlayerCommand::ChimeraSlayerCommand(){   
127         try {
128                 abort = true; calledHelp = true;
129                 setParameters();
130                 vector<string> tempOutNames;
131                 outputTypes["chimera"] = tempOutNames;
132                 outputTypes["accnos"] = tempOutNames;
133                 outputTypes["fasta"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false;   
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145         hasCount = false;
146         hasName = false;
147                 
148                 //allow user to run help
149                 if(option == "help") { help(); abort = true; calledHelp = true; }
150                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151                 
152                 else {
153                         vector<string> myArray = setParameters();
154                         
155                         OptionParser parser(option);
156                         map<string,string> parameters = parser.getParameters();
157                         
158                         ValidParameters validParameter("chimera.slayer");
159                         map<string,string>::iterator it;
160                         
161                         //check to make sure all parameters are valid for command
162                         for (it = parameters.begin(); it != parameters.end(); it++) { 
163                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
164                         }
165                         
166                         vector<string> tempOutNames;
167                         outputTypes["chimera"] = tempOutNames;
168                         outputTypes["accnos"] = tempOutNames;
169                         outputTypes["fasta"] = tempOutNames;
170                 
171                         //if the user changes the input directory command factory will send this info to us in the output parameter 
172                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
173                         if (inputDir == "not found"){   inputDir = "";          }
174                                                 
175                         //check for required parameters
176                         fastafile = validParameter.validFile(parameters, "fasta", false);
177                         if (fastafile == "not found") {                                 
178                                 //if there is a current fasta file, use it
179                                 string filename = m->getFastaFile(); 
180                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
181                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
182                         }else { 
183                                 m->splitAtDash(fastafile, fastaFileNames);
184                                 
185                                 //go through files and make sure they are good, if not, then disregard them
186                                 for (int i = 0; i < fastaFileNames.size(); i++) {
187                                         
188                                         bool ignore = false;
189                                         if (fastaFileNames[i] == "current") { 
190                                                 fastaFileNames[i] = m->getFastaFile(); 
191                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
192                                                 else {  
193                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
194                                                         //erase from file list
195                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
196                                                         i--;
197                                                 }
198                                         }
199                                         
200                                         if (!ignore) {
201                                                 
202                                                 if (inputDir != "") {
203                                                         string path = m->hasPath(fastaFileNames[i]);
204                                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
206                                                 }
207                 
208                                                 int ableToOpen;
209                                                 ifstream in;
210                                                 
211                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
212                                         
213                                                 //if you can't open it, try default location
214                                                 if (ableToOpen == 1) {
215                                                         if (m->getDefaultPath() != "") { //default path is set
216                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
217                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
218                                                                 ifstream in2;
219                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
220                                                                 in2.close();
221                                                                 fastaFileNames[i] = tryPath;
222                                                         }
223                                                 }
224                                                 
225                                                 if (ableToOpen == 1) {
226                                                         if (m->getOutputDir() != "") { //default path is set
227                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
228                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
229                                                                 ifstream in2;
230                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
231                                                                 in2.close();
232                                                                 fastaFileNames[i] = tryPath;
233                                                         }
234                                                 }
235                                                 
236                                                 in.close();
237                                                 
238                                                 if (ableToOpen == 1) { 
239                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
240                                                         //erase from file list
241                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
242                                                         i--;
243                                                 }else {
244                                                         m->setFastaFile(fastaFileNames[i]);
245                                                 }
246                                         }
247                                 }
248                                 
249                                 //make sure there is at least one valid file left
250                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
251                         }
252                         
253                         
254                         //check for required parameters
255                         namefile = validParameter.validFile(parameters, "name", false);
256                         if (namefile == "not found") { namefile = "";   }
257                         else { 
258                                 m->splitAtDash(namefile, nameFileNames);
259                                 
260                                 //go through files and make sure they are good, if not, then disregard them
261                                 for (int i = 0; i < nameFileNames.size(); i++) {
262                                         
263                                         bool ignore = false;
264                                         if (nameFileNames[i] == "current") { 
265                                                 nameFileNames[i] = m->getNameFile(); 
266                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
267                                                 else {  
268                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
269                                                         //erase from file list
270                                                         nameFileNames.erase(nameFileNames.begin()+i);
271                                                         i--;
272                                                 }
273                                         }
274                                         
275                                         if (!ignore) {
276                                                 
277                                                 if (inputDir != "") {
278                                                         string path = m->hasPath(nameFileNames[i]);
279                                                         //if the user has not given a path then, add inputdir. else leave path alone.
280                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
281                                                 }
282                                                 
283                                                 int ableToOpen;
284                                                 ifstream in;
285                                                 
286                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
287                                                 
288                                                 //if you can't open it, try default location
289                                                 if (ableToOpen == 1) {
290                                                         if (m->getDefaultPath() != "") { //default path is set
291                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
292                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
293                                                                 ifstream in2;
294                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
295                                                                 in2.close();
296                                                                 nameFileNames[i] = tryPath;
297                                                         }
298                                                 }
299                                                 
300                                                 if (ableToOpen == 1) {
301                                                         if (m->getOutputDir() != "") { //default path is set
302                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
303                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
304                                                                 ifstream in2;
305                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
306                                                                 in2.close();
307                                                                 nameFileNames[i] = tryPath;
308                                                         }
309                                                 }
310                                                 
311                                                 in.close();
312                                                 
313                                                 if (ableToOpen == 1) { 
314                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
315                                                         //erase from file list
316                                                         nameFileNames.erase(nameFileNames.begin()+i);
317                                                         i--;
318                                                 }else {
319                                                         m->setNameFile(nameFileNames[i]);
320                                                 }
321                                         }
322                                 }
323                         }
324             
325             if (nameFileNames.size() != 0) { hasName = true; }
326             
327             //check for required parameters
328             vector<string> countfileNames;
329                         countfile = validParameter.validFile(parameters, "count", false);
330                         if (countfile == "not found") { 
331                 countfile = "";  
332                         }else { 
333                                 m->splitAtDash(countfile, countfileNames);
334                                 
335                                 //go through files and make sure they are good, if not, then disregard them
336                                 for (int i = 0; i < countfileNames.size(); i++) {
337                                         
338                                         bool ignore = false;
339                                         if (countfileNames[i] == "current") { 
340                                                 countfileNames[i] = m->getCountTableFile(); 
341                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
342                                                 else {  
343                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
344                                                         //erase from file list
345                                                         countfileNames.erase(countfileNames.begin()+i);
346                                                         i--;
347                                                 }
348                                         }
349                                         
350                                         if (!ignore) {
351                                                 
352                                                 if (inputDir != "") {
353                                                         string path = m->hasPath(countfileNames[i]);
354                                                         //if the user has not given a path then, add inputdir. else leave path alone.
355                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
356                                                 }
357                                                 
358                                                 int ableToOpen;
359                                                 ifstream in;
360                                                 
361                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
362                                                 
363                                                 //if you can't open it, try default location
364                                                 if (ableToOpen == 1) {
365                                                         if (m->getDefaultPath() != "") { //default path is set
366                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
367                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
368                                                                 ifstream in2;
369                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
370                                                                 in2.close();
371                                                                 countfileNames[i] = tryPath;
372                                                         }
373                                                 }
374                                                 
375                                                 if (ableToOpen == 1) {
376                                                         if (m->getOutputDir() != "") { //default path is set
377                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
378                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
379                                                                 ifstream in2;
380                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
381                                                                 in2.close();
382                                                                 countfileNames[i] = tryPath;
383                                                         }
384                                                 }
385                                                 
386                                                 in.close();
387                                                 
388                                                 if (ableToOpen == 1) { 
389                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
390                                                         //erase from file list
391                                                         countfileNames.erase(countfileNames.begin()+i);
392                                                         i--;
393                                                 }else {
394                                                         m->setCountTableFile(countfileNames[i]);
395                                                 }
396                                         }
397                                 }
398                         }
399             
400             if (countfileNames.size() != 0) { hasCount = true; }
401             
402                         //make sure there is at least one valid file left
403             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
404             
405             if (!hasName && hasCount) { nameFileNames = countfileNames; }
406             
407                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
408                         
409                         bool hasGroup = true;
410                         groupfile = validParameter.validFile(parameters, "group", false);
411                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
412                         else { 
413                                 m->splitAtDash(groupfile, groupFileNames);
414                                 
415                                 //go through files and make sure they are good, if not, then disregard them
416                                 for (int i = 0; i < groupFileNames.size(); i++) {
417                                         
418                                         bool ignore = false;
419                                         if (groupFileNames[i] == "current") { 
420                                                 groupFileNames[i] = m->getGroupFile(); 
421                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
422                                                 else {  
423                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
424                                                         //erase from file list
425                                                         groupFileNames.erase(groupFileNames.begin()+i);
426                                                         i--;
427                                                 }
428                                         }
429                                         
430                                         if (!ignore) {
431                                                 
432                                                 if (inputDir != "") {
433                                                         string path = m->hasPath(groupFileNames[i]);
434                                                         //if the user has not given a path then, add inputdir. else leave path alone.
435                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
436                                                 }
437                                                 
438                                                 int ableToOpen;
439                                                 ifstream in;
440                                                 
441                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
442                                                 
443                                                 //if you can't open it, try default location
444                                                 if (ableToOpen == 1) {
445                                                         if (m->getDefaultPath() != "") { //default path is set
446                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
447                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
448                                                                 ifstream in2;
449                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
450                                                                 in2.close();
451                                                                 groupFileNames[i] = tryPath;
452                                                         }
453                                                 }
454                                                 
455                                                 if (ableToOpen == 1) {
456                                                         if (m->getOutputDir() != "") { //default path is set
457                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
458                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
459                                                                 ifstream in2;
460                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
461                                                                 in2.close();
462                                                                 groupFileNames[i] = tryPath;
463                                                         }
464                                                 }
465                                                 
466                                                 in.close();
467                                                 
468                                                 if (ableToOpen == 1) { 
469                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
470                                                         //erase from file list
471                                                         groupFileNames.erase(groupFileNames.begin()+i);
472                                                         i--;
473                                                 }else {
474                                                         m->setGroupFile(groupFileNames[i]);
475                                                 }
476                                         }
477                                 }
478                                 
479                                 //make sure there is at least one valid file left
480                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
481                         }
482                         
483                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
484                         
485             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
486                         //if the user changes the output directory command factory will send this info to us in the output parameter 
487                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
488                         
489                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
490                         m->setProcessors(temp);
491                         m->mothurConvert(temp, processors);
492                         
493                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
494                         save = m->isTrue(temp); 
495                         rdb->save = save; 
496                         if (save) { //clear out old references
497                                 rdb->clearMemory();     
498                         }
499                         
500                         string path;
501                         it = parameters.find("reference");
502                         //user has given a template file
503                         if(it != parameters.end()){ 
504                                 if (it->second == "self") { 
505                                         templatefile = "self"; 
506                                         if (save) {
507                                                 m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
508                                                 m->mothurOutEndLine();
509                                                 save = false;
510                                         }
511                                 }
512                                 else {
513                                         path = m->hasPath(it->second);
514                                         //if the user has not given a path then, add inputdir. else leave path alone.
515                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
516                                         
517                                         templatefile = validParameter.validFile(parameters, "reference", true);
518                                         if (templatefile == "not open") { abort = true; }
519                                         else if (templatefile == "not found") { //check for saved reference sequences
520                                                 if (rdb->referenceSeqs.size() != 0) {
521                                                         templatefile = "saved";
522                                                 }else {
523                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
524                                                         m->mothurOutEndLine();
525                                                         abort = true; 
526                                                 }
527                                         }else { if (save) {     rdb->setSavedReference(templatefile);   }       }       
528                                 }
529                         }else if (hasName) {  templatefile = "self"; 
530                                 if (save) {
531                                         m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
532                                         m->mothurOutEndLine();
533                                         save = false;
534                                 }
535                         }else if (hasCount) {  templatefile = "self"; 
536                                 if (save) {
537                                         m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
538                                         m->mothurOutEndLine();
539                                         save = false;
540                                 }
541                         }
542                         else { 
543                                 if (rdb->referenceSeqs.size() != 0) {
544                                         templatefile = "saved";
545                                 }else {
546                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
547                                         m->mothurOutEndLine();
548                                         templatefile = ""; abort = true; 
549                                 } 
550                         }
551                         
552                         
553                         
554                         temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
555                         m->mothurConvert(temp, ksize);
556                                                 
557                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "50"; }                       
558                         m->mothurConvert(temp, window);
559                         
560                         temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
561                         m->mothurConvert(temp, match);
562                         
563                         temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
564                         m->mothurConvert(temp, mismatch);
565                         
566                         temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
567                         m->mothurConvert(temp, divR);
568                         
569                         temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
570                         m->mothurConvert(temp, minSimilarity);
571                         
572                         temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
573                         m->mothurConvert(temp, minCoverage);
574                         
575                         temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
576                         m->mothurConvert(temp, minBS);
577                         
578                         temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
579                         m->mothurConvert(temp, minSNP);
580
581                         temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
582                         m->mothurConvert(temp, parents); 
583                         
584                         temp = validParameter.validFile(parameters, "realign", false);                  if (temp == "not found") { temp = "t"; }
585                         realign = m->isTrue(temp); 
586                         
587                         temp = validParameter.validFile(parameters, "trim", false);                             if (temp == "not found") { temp = "f"; }
588                         trim = m->isTrue(temp); 
589                         
590                         temp = validParameter.validFile(parameters, "split", false);                    if (temp == "not found") { temp = "f"; }
591                         trimera = m->isTrue(temp); 
592                         
593                         search = validParameter.validFile(parameters, "search", false);                 if (search == "not found") { search = "blast"; }
594                         
595                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }             
596                         m->mothurConvert(temp, iters); 
597                          
598                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "5"; }
599                         m->mothurConvert(temp, increment);
600                         
601                         temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "15"; }               
602                         m->mothurConvert(temp, numwanted);
603                         
604                         blastlocation = validParameter.validFile(parameters, "blastlocation", false);   
605                         if (blastlocation == "not found") { blastlocation = ""; }
606                         else {
607                                 //add / to name if needed
608                                 string lastChar = blastlocation.substr(blastlocation.length()-1);
609 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
610                                 if (lastChar != "/") { blastlocation += "/"; }
611 #else
612                                 if (lastChar != "\\") { blastlocation += "\\"; }        
613 #endif
614                                 blastlocation = m->getFullPathName(blastlocation);
615                                 string formatdbCommand = "";
616 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
617                                 formatdbCommand = blastlocation + "formatdb";   
618 #else
619                                 formatdbCommand = blastlocation + "formatdb.exe";
620 #endif
621                                 
622                                 //test to make sure formatdb exists
623                                 ifstream in;
624                                 formatdbCommand = m->getFullPathName(formatdbCommand);
625                                 int ableToOpen = m->openInputFile(formatdbCommand, in, "no error"); in.close();
626                                 if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
627                                 
628                                 string blastCommand = "";
629 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
630                                 blastCommand = blastlocation + "megablast";     
631 #else
632                                 blastCommand = blastlocation + "megablast.exe";
633 #endif
634                                 //test to make sure formatdb exists
635                                 ifstream in2;
636                                 blastCommand = m->getFullPathName(blastCommand);
637                                 ableToOpen = m->openInputFile(blastCommand, in2, "no error"); in2.close();
638                                 if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
639                         }
640
641                         if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
642                         
643                         if ((hasName || hasCount) && (templatefile != "self")) { m->mothurOut("You have provided a namefile or countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
644                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
645
646                         //until we resolve the issue 10-18-11
647 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
648 #else
649                         //processors=1;
650 #endif
651                 }
652         }
653         catch(exception& e) {
654                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
655                 exit(1);
656         }
657 }
658 //***************************************************************************************************************
659
660 int ChimeraSlayerCommand::execute(){
661         try{
662                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
663
664                 for (int s = 0; s < fastaFileNames.size(); s++) {
665                                 
666                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
667                 
668                         int start = time(NULL); 
669                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
670                         string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
671                         string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
672                         string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("fasta");             
673                         
674                         //clears files
675                         ofstream out, out1, out2;
676                         m->openOutputFile(outputFileName, out); out.close(); 
677                         m->openOutputFile(accnosFileName, out1); out1.close();
678                         if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); }
679                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
680                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
681                         if (trim) {  outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }                     
682                         
683                         //maps a filename to priority map. 
684                         //if no groupfile this is fastafileNames[s] -> prioirity
685                         //if groupfile then this is each groups seqs -> priority
686                         map<string, map<string, int> > fileToPriority; 
687                         map<string, map<string, int> >::iterator itFile;
688                         map<string, string> fileGroup;
689                         fileToPriority[fastaFileNames[s]] = priority; //default
690                         fileGroup[fastaFileNames[s]] = "noGroup";
691             map<string, string> uniqueNames; 
692                         int totalChimeras = 0;
693                         lines.clear();
694                         
695                         if (templatefile == "self") { 
696                 if (hasCount) {
697                     SequenceCountParser* parser = NULL;
698                     setUpForSelfReference(parser, fileGroup, fileToPriority, s); 
699                     if (parser != NULL) { uniqueNames = parser->getAllSeqsMap(); delete parser; }
700                 }else {
701                     SequenceParser* parser = NULL;
702                     setUpForSelfReference(parser, fileGroup, fileToPriority, s); 
703                     if (parser != NULL) { uniqueNames = parser->getAllSeqsMap(); delete parser; }
704                 }
705             }
706                         
707                         if (m->control_pressed) {   for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  return 0;    }
708
709                         if (fileToPriority.size() == 1) { //you running without a groupfile
710                                 itFile = fileToPriority.begin();
711                                 string thisFastaName = itFile->first;
712                                 map<string, int> thisPriority = itFile->second;
713 #ifdef USE_MPI  
714                                 MPIExecute(thisFastaName, outputFileName, accnosFileName, trimFastaFileName, thisPriority);
715 #else
716                                 //break up file
717                                 vector<unsigned long long> positions; 
718 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
719                                 positions = m->divideFile(thisFastaName, processors);
720                                 for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
721 #else
722                                 if (processors == 1) {  lines.push_back(linePair(0, 1000)); }
723                                 else {
724                                         positions = m->setFilePosFasta(thisFastaName, numSeqs); 
725                     if (positions.size() < processors) { processors = positions.size(); }
726                                         
727                                         //figure out how many sequences you have to process
728                                         int numSeqsPerProcessor = numSeqs / processors;
729                                         for (int i = 0; i < processors; i++) {
730                                                 int startIndex =  i * numSeqsPerProcessor;
731                                                 if(i == (processors - 1)){      numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
732                                                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
733                                         }
734                                 }
735 #endif
736                                 if(processors == 1){ numSeqs = driver(lines[0], outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority);  }
737                                 else{ numSeqs = createProcesses(outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); }
738                                 
739                                 if (m->control_pressed) {  outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  return 0; }                          
740 #endif
741                         }else { //you have provided a groupfile
742 #ifdef USE_MPI  
743                                 MPIExecuteGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup);
744 #else
745                                 if (processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup);    }
746                                 else {  numSeqs = createProcessesGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup);          } //destroys fileToPriority
747 #endif
748
749 #ifdef USE_MPI  
750                                 int pid; 
751                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
752                                 
753                                 if (pid == 0) {
754 #endif
755                                 totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName);
756                 m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
757 #ifdef USE_MPI  
758                                 }
759                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
760 #endif
761                         }
762                         
763             m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");    m->mothurOutEndLine();
764                 }
765                 
766                 //set accnos file as new current accnosfile
767                 string current = "";
768                 itTypes = outputTypes.find("accnos");
769                 if (itTypes != outputTypes.end()) {
770                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
771                 }
772                 
773                 if (trim) {
774                         itTypes = outputTypes.find("fasta");
775                         if (itTypes != outputTypes.end()) {
776                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
777                         }
778                 }
779                 
780                 m->mothurOutEndLine();
781                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
782                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
783                 m->mothurOutEndLine();
784
785                 return 0;
786                 
787         }
788         catch(exception& e) {
789                 m->errorOut(e, "ChimeraSlayerCommand", "execute");
790                 exit(1);
791         }
792 }
793 //**********************************************************************************************************************
794 int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosFileName, string trimFastaFileName, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup){
795         try {
796 #ifdef USE_MPI  
797                 int pid; 
798                 int tag = 2001;
799                 
800                 MPI_Status status; 
801                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
802                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
803         
804                 //put filenames in a vector, then pass each process a starting and ending point in the vector
805                 //all processes already have the fileToPriority and fileGroup, they just need to know which files to process
806                 map<string, map<string, int> >::iterator itFile;
807                 vector<string> filenames;
808                 for(itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { filenames.push_back(itFile->first); }
809                 
810                 int numGroupsPerProcessor = filenames.size() / processors;
811                 int startIndex =  pid * numGroupsPerProcessor;
812                 int endIndex = (pid+1) * numGroupsPerProcessor;
813                 if(pid == (processors - 1)){    endIndex = filenames.size();    }
814                 
815                 vector<unsigned long long> MPIPos;
816                 
817                 MPI_File outMPI;
818                 MPI_File outMPIAccnos;
819                 MPI_File outMPIFasta;
820                 
821                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
822                 int inMode=MPI_MODE_RDONLY; 
823                 
824                 char outFilename[1024];
825                 strcpy(outFilename, outputFileName.c_str());
826                 
827                 char outAccnosFilename[1024];
828                 strcpy(outAccnosFilename, accnosFileName.c_str());
829                 
830                 char outFastaFilename[1024];
831                 strcpy(outFastaFilename, trimFastaFileName.c_str());
832                 
833                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
834                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
835                 if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
836                 
837                 if (m->control_pressed) {   MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos);  return 0;  }
838                 
839                 //print headers
840                 if (pid == 0) { //you are the root process 
841                         m->mothurOutEndLine();
842                         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
843                         m->mothurOutEndLine();
844                         
845                         string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
846                         
847                         //print header
848                         int length = outTemp.length();
849                         char* buf2 = new char[length];
850                         memcpy(buf2, outTemp.c_str(), length);
851                         
852                         MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
853                         delete buf2;
854                 }
855                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
856                 
857                 for (int i = startIndex; i < endIndex; i++) {
858                         
859                         int start = time(NULL);
860                         int num = 0;
861                         string thisFastaName = filenames[i];
862                         map<string, int> thisPriority = fileToPriority[thisFastaName];
863                         
864                         char inFileName[1024];
865                         strcpy(inFileName, thisFastaName.c_str());
866                         MPI_File inMPI;
867                         MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
868                         
869                         MPIPos = m->setFilePosFasta(thisFastaName, num); //fills MPIPos, returns numSeqs
870                         
871                         cout << endl << "Checking sequences from group: " << fileGroup[thisFastaName] << "." << endl; 
872                         
873                         driverMPI(0, num, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, thisFastaName, thisPriority, true);
874                         numSeqs += num;
875                         
876                         MPI_File_close(&inMPI);
877                         m->mothurRemove(thisFastaName);
878                                                 
879                         cout << endl << "It took " << toString(time(NULL) - start) << " secs to check " + toString(num) + " sequences from group " << fileGroup[thisFastaName] << "." << endl;
880                 }
881                 
882                 if (pid == 0) {
883                         for(int i = 1; i < processors; i++) { 
884                                 int temp = 0;
885                                 MPI_Recv(&temp, 1, MPI_INT, i, 2001, MPI_COMM_WORLD, &status);
886                                 numSeqs += temp;
887                         }
888                 }else{ MPI_Send(&numSeqs, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); }
889                 
890                 MPI_File_close(&outMPI);
891                 MPI_File_close(&outMPIAccnos); 
892                 if (trim) { MPI_File_close(&outMPIFasta); }
893                 
894                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
895 #endif
896                 return 0;
897                 
898         }catch(exception& e) {
899                 m->errorOut(e, "ChimeraSlayerCommand", "MPIExecuteGroups");
900                 exit(1);
901         }
902 }               
903 //**********************************************************************************************************************
904 int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName, map<string, int>& priority){
905         try {
906                 
907 #ifdef USE_MPI  
908                 int pid, numSeqsPerProcessor; 
909                 int tag = 2001;
910                 vector<unsigned long long> MPIPos;
911                 
912                 MPI_Status status; 
913                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
914                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
915                 
916                 MPI_File inMPI;
917                 MPI_File outMPI;
918                 MPI_File outMPIAccnos;
919                 MPI_File outMPIFasta;
920                 
921                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
922                 int inMode=MPI_MODE_RDONLY; 
923                 
924                 char outFilename[1024];
925                 strcpy(outFilename, outputFileName.c_str());
926                 
927                 char outAccnosFilename[1024];
928                 strcpy(outAccnosFilename, accnosFileName.c_str());
929                 
930                 char outFastaFilename[1024];
931                 strcpy(outFastaFilename, trimFastaFileName.c_str());
932                 
933                 char inFileName[1024];
934                 strcpy(inFileName, inputFile.c_str());
935                 
936                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
937                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
938                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
939                 if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
940                 
941                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos);  return 0;  }
942                 
943                 if (pid == 0) { //you are the root process 
944                         m->mothurOutEndLine();
945                         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
946                         m->mothurOutEndLine();
947                         
948                         string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
949                         
950                         //print header
951                         int length = outTemp.length();
952                         char* buf2 = new char[length];
953                         memcpy(buf2, outTemp.c_str(), length);
954                         
955                         MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
956                         delete buf2;
957                         
958                         MPIPos = m->setFilePosFasta(inputFile, numSeqs); //fills MPIPos, returns numSeqs
959                         
960                         if (templatefile != "self") { //if template=self we can only use 1 processor
961                                 //send file positions to all processes
962                                 for(int i = 1; i < processors; i++) { 
963                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
964                                         MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
965                                 }
966                         }
967                         //figure out how many sequences you have to align
968                         numSeqsPerProcessor = numSeqs / processors;
969                         int startIndex =  pid * numSeqsPerProcessor;
970                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
971                         
972                         if (templatefile == "self") { //if template=self we can only use 1 processor
973                                 startIndex = 0;
974                                 numSeqsPerProcessor = numSeqs;
975                         }
976                         
977                         //do your part
978                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false);
979                                                 
980                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);   return 0;  }
981                         
982                 }else{ //you are a child process
983                         if (templatefile != "self") { //if template=self we can only use 1 processor
984                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
985                                 MPIPos.resize(numSeqs+1);
986                                 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
987                                 
988                                 //figure out how many sequences you have to align
989                                 numSeqsPerProcessor = numSeqs / processors;
990                                 int startIndex =  pid * numSeqsPerProcessor;
991                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
992                                 
993                                 //do your part
994                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false);
995                                 
996                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  return 0;  }
997                                 
998                         }
999                 }
1000                 
1001                 //close files 
1002                 MPI_File_close(&inMPI);
1003                 MPI_File_close(&outMPI);
1004                 MPI_File_close(&outMPIAccnos); 
1005                 if (trim) { MPI_File_close(&outMPIFasta); }
1006                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
1007                 
1008                 
1009 #endif          
1010                 return numSeqs;
1011         }
1012         catch(exception& e) {
1013                 m->errorOut(e, "ChimeraSlayerCommand", "MPIExecute");
1014                 exit(1);
1015         }
1016 }
1017 //**********************************************************************************************************************
1018 int ChimeraSlayerCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string trimFileName){
1019         try {
1020                 map<string, string>::iterator itUnique;
1021                 int total = 0;
1022         
1023         if (trimera) { //add in more potential uniqueNames
1024             map<string, string> newUniqueNames = uniqueNames;
1025             for (map<string, string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {
1026                 newUniqueNames[(it->first)+"_LEFT"] = (it->first)+"_LEFT";
1027                 newUniqueNames[(it->first)+"_RIGHT"] = (it->first)+"_RIGHT";
1028             }
1029             uniqueNames = newUniqueNames;
1030             newUniqueNames.clear();
1031         }
1032                 
1033                 //edit accnos file
1034                 ifstream in2; 
1035                 m->openInputFile(accnosFileName, in2, "no error");
1036                 
1037                 ofstream out2;
1038                 m->openOutputFile(accnosFileName+".temp", out2);
1039                 
1040                 string name; name = "";
1041                 set<string> chimerasInFile;
1042                 set<string>::iterator itChimeras;
1043                 
1044                 while (!in2.eof()) {
1045                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1046                         
1047                         in2 >> name; m->gobble(in2);
1048                         
1049                         //find unique name
1050                         itUnique = uniqueNames.find(name);
1051                         
1052                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1053                         else {
1054                                 itChimeras = chimerasInFile.find((itUnique->second));
1055                                 
1056                                 if (itChimeras == chimerasInFile.end()) {
1057                                         out2 << itUnique->second << endl;
1058                                         chimerasInFile.insert((itUnique->second));
1059                                         total++;
1060                                 }
1061                         }
1062                 }
1063                 in2.close();
1064                 out2.close();
1065                 
1066                 m->mothurRemove(accnosFileName);
1067                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1068                 
1069                 
1070                 //edit chimera file
1071                 ifstream in; 
1072                 m->openInputFile(outputFileName, in);
1073                 
1074                 ofstream out;
1075                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1076
1077                 string rest, parent1, parent2, line;
1078                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1079                 set<string>::iterator itNames;
1080                 
1081                 //assumptions - in file each read will always look like...
1082                 /*
1083                  F11Fcsw_92754  no
1084                  F11Fcsw_63104  F11Fcsw_33372   F11Fcsw_37007   0.89441 80.4469 0.2     1.03727 93.2961 52.2    no      0-241   243-369 
1085                  */
1086                 
1087                 //get header line
1088                 if (!in.eof()) {
1089                         line = m->getline(in); m->gobble(in);
1090                         out << line << endl;
1091                 }
1092                 
1093                 //for the chimera file, we want to make sure if any group finds a sequence to be chimeric then all groups do, 
1094                 //so if this is a report that did not find it to be chimeric, but it appears in the accnos file, 
1095                 //then ignore this report and continue until we find the report that found it to be chimeric
1096                 
1097                 while (!in.eof()) {
1098                         
1099                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1100                         
1101                         in >> name;             m->gobble(in);
1102                         in >> parent1;  m->gobble(in);
1103                         
1104                         if (name == "Name") { //name = "Name" because we append the header line each time we add results from the groups
1105                                 line = m->getline(in); m->gobble(in);
1106                         }else {
1107                                 if (parent1 == "no") {
1108                                         //find unique name
1109                                         itUnique = uniqueNames.find(name);
1110                                         
1111                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1112                                         else {
1113                                                 //is this sequence really not chimeric??
1114                                                 itChimeras = chimerasInFile.find(itUnique->second);
1115                                                 
1116                                                 if (itChimeras == chimerasInFile.end()) {
1117                                                         //is this sequence not already in the file
1118                                                         itNames = namesInFile.find((itUnique->second));
1119                                                         
1120                                                         if (itNames == namesInFile.end()) { out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); }
1121                                                 }
1122                                         }
1123                                 }else { //read the rest of the line
1124                                         double DivQLAQRB,PerIDQLAQRB,BootStrapA,DivQLBQRA,PerIDQLBQRA,BootStrapB;
1125                                         string flag, range1, range2;
1126                                         bool print = false;
1127                                         in >> parent2 >> DivQLAQRB >> PerIDQLAQRB >> BootStrapA >> DivQLBQRA >> PerIDQLBQRA >> BootStrapB >> flag >> range1 >> range2;  m->gobble(in);
1128                                         
1129                                         //find unique name
1130                                         itUnique = uniqueNames.find(name);
1131                                         
1132                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1133                                         else {
1134                                                 name = itUnique->second;
1135                                                 //is this name already in the file
1136                                                 itNames = namesInFile.find((name));
1137                                                 
1138                                                 if (itNames == namesInFile.end()) { //no not in file
1139                                                         if (flag == "no") { //are you really a no??
1140                                                                 //is this sequence really not chimeric??
1141                                                                 itChimeras = chimerasInFile.find(name);
1142                                                                 
1143                                                                 //then you really are a no so print, otherwise skip
1144                                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
1145                                                                 
1146                                                         }else{ print = true; }
1147                                                 }
1148                                         }
1149                                         
1150                                         if (print) {
1151                                                 out << name << '\t';
1152                                                 
1153                                                 namesInFile.insert(name);
1154
1155                                                 //output parent1's name
1156                                                 itUnique = uniqueNames.find(parent1);
1157                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1158                                                 else { out << itUnique->second << '\t'; }
1159                                                 
1160                                                 //output parent2's name
1161                                                 itUnique = uniqueNames.find(parent2);
1162                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1163                                                 else { out << itUnique->second << '\t'; }
1164                                                 
1165                                                 out << DivQLAQRB << '\t' << PerIDQLAQRB << '\t' << BootStrapA << '\t' << DivQLBQRA << '\t' << PerIDQLBQRA << '\t' << BootStrapB << '\t' << flag << '\t' << range1 << '\t' << range2 << endl;
1166                                         }
1167                                 }                               
1168                         }
1169                 }
1170                 in.close();
1171                 out.close();
1172                 
1173                 m->mothurRemove(outputFileName);
1174                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1175                 
1176                 //edit fasta file
1177                 if (trim) {
1178                         ifstream in3; 
1179                         m->openInputFile(trimFileName, in3);
1180                         
1181                         ofstream out3;
1182                         m->openOutputFile(trimFileName+".temp", out3);
1183                         
1184                         namesInFile.clear();
1185                         
1186                         while (!in3.eof()) {
1187                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); m->mothurRemove((trimFileName+".temp")); return 0; }
1188                                 
1189                                 Sequence seq(in3); m->gobble(in3);
1190                                 
1191                                 if (seq.getName() != "") {
1192                                         //find unique name
1193                                         itUnique = uniqueNames.find(seq.getName());
1194                                         
1195                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ seq.getName() + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1196                                         else {
1197                                                 itNames = namesInFile.find((itUnique->second));
1198                                                 
1199                                                 if (itNames == namesInFile.end()) {
1200                                                         seq.printSequence(out3);
1201                                                 }
1202                                         }
1203                                 }
1204                         }
1205                         in3.close();
1206                         out3.close();
1207                         
1208                         m->mothurRemove(trimFileName);
1209                         rename((trimFileName+".temp").c_str(), trimFileName.c_str());
1210                 }
1211                 
1212                 return total;
1213         }
1214         catch(exception& e) {
1215                 m->errorOut(e, "ChimeraSlayerCommand", "deconvoluteResults");
1216                 exit(1);
1217         }
1218 }
1219 //**********************************************************************************************************************
1220 int ChimeraSlayerCommand::setUpForSelfReference(SequenceParser*& parser, map<string, string>& fileGroup, map<string, map<string, int> >& fileToPriority, int s){
1221         try {
1222                 fileGroup.clear();
1223                 fileToPriority.clear();
1224                 
1225                 string nameFile = "";
1226                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
1227                         nameFile = nameFileNames[s];
1228                 }else {  nameFile = getNamesFile(fastaFileNames[s]); }
1229                 
1230                 //you provided a groupfile
1231                 string groupFile = "";
1232                 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
1233                 
1234                 if (groupFile == "") { 
1235                         if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
1236                                                 
1237                         //sort fastafile by abundance, returns new sorted fastafile name
1238                         m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
1239                         priority = sortFastaFile(fastaFileNames[s], nameFile);
1240                         m->mothurOut("Done."); m->mothurOutEndLine();
1241                         
1242                         fileToPriority[fastaFileNames[s]] = priority;
1243                         fileGroup[fastaFileNames[s]] = "noGroup";
1244                 }else {
1245                         //Parse sequences by group
1246                         parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
1247                         vector<string> groups = parser->getNamesOfGroups();
1248                         
1249                         for (int i = 0; i < groups.size(); i++) {
1250                                 vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
1251                                 map<string, string> thisGroupsMap = parser->getNameMap(groups[i]);
1252                                 string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
1253                                 priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); 
1254                                 fileToPriority[newFastaFile] = priority;
1255                                 fileGroup[newFastaFile] = groups[i];
1256                         }
1257                 }
1258                 
1259                 
1260                 return 0;
1261         }
1262         catch(exception& e) {
1263                 m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference");
1264                 exit(1);
1265         }
1266 }
1267 //**********************************************************************************************************************
1268 int ChimeraSlayerCommand::setUpForSelfReference(SequenceCountParser*& parser, map<string, string>& fileGroup, map<string, map<string, int> >& fileToPriority, int s){
1269         try {
1270                 fileGroup.clear();
1271                 fileToPriority.clear();
1272                 
1273                 string nameFile = "";
1274                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
1275                         nameFile = nameFileNames[s];
1276                 }else {  m->control_pressed = true; return 0; }
1277          
1278                 CountTable ct;
1279                 if (!ct.testGroups(nameFile)) {  
1280                         if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
1281             
1282                         //sort fastafile by abundance, returns new sorted fastafile name
1283                         m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
1284                         priority = sortFastaFile(fastaFileNames[s], nameFile);
1285                         m->mothurOut("Done."); m->mothurOutEndLine();
1286                         
1287                         fileToPriority[fastaFileNames[s]] = priority;
1288                         fileGroup[fastaFileNames[s]] = "noGroup";
1289                 }else {
1290                         //Parse sequences by group
1291                         parser = new SequenceCountParser(nameFile, fastaFileNames[s]);
1292                         vector<string> groups = parser->getNamesOfGroups();
1293                         
1294                         for (int i = 0; i < groups.size(); i++) {
1295                                 vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
1296                                 map<string, int> thisGroupsMap = parser->getCountTable(groups[i]);
1297                                 string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
1298                                 sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); 
1299                                 fileToPriority[newFastaFile] = thisGroupsMap;
1300                                 fileGroup[newFastaFile] = groups[i];
1301                         }
1302                 }
1303                 
1304                 
1305                 return 0;
1306         }
1307         catch(exception& e) {
1308                 m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference");
1309                 exit(1);
1310         }
1311 }
1312 //**********************************************************************************************************************
1313 string ChimeraSlayerCommand::getNamesFile(string& inputFile){
1314         try {
1315                 string nameFile = "";
1316                 
1317                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1318                 
1319                 //use unique.seqs to create new name and fastafile
1320                 string inputString = "fasta=" + inputFile;
1321                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1322                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1323                 m->mothurCalling = true;
1324         
1325                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1326                 uniqueCommand->execute();
1327                 
1328                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1329                 
1330                 delete uniqueCommand;
1331                 m->mothurCalling = false;
1332                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1333                 
1334                 nameFile = filenames["name"][0];
1335                 inputFile = filenames["fasta"][0];
1336                 
1337                 return nameFile;
1338         }
1339         catch(exception& e) {
1340                 m->errorOut(e, "ChimeraSlayerCommand", "getNamesFile");
1341                 exit(1);
1342         }
1343 }
1344 //**********************************************************************************************************************
1345
1346 int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup){
1347         try {
1348                 int totalSeqs = 0;
1349                 
1350                 for (map<string, map<string, int> >::iterator itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) {
1351                         
1352                         if (m->control_pressed) {  return 0;  }
1353                         
1354                         int start = time(NULL);
1355                         string thisFastaName = itFile->first;
1356                         map<string, int> thisPriority = itFile->second;
1357                         string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera";
1358                         string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos";
1359                         string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta";
1360                         
1361                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); 
1362                         
1363                         lines.clear();
1364 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1365                         int proc = 1;
1366                         vector<unsigned long long> positions = m->divideFile(thisFastaName, proc);
1367                         lines.push_back(linePair(positions[0], positions[1]));  
1368 #else
1369                         lines.push_back(linePair(0, 1000)); 
1370 #endif                  
1371                         int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority);
1372                         
1373                         //append files
1374                         m->appendFiles(thisoutputFileName, outputFName); m->mothurRemove(thisoutputFileName); 
1375                         m->appendFiles(thisaccnosFileName, accnos); m->mothurRemove(thisaccnosFileName);
1376                         if (trim) { m->appendFiles(thistrimFastaFileName, fasta); m->mothurRemove(thistrimFastaFileName); }
1377                         m->mothurRemove(thisFastaName);
1378                         
1379                         totalSeqs += numSeqs;
1380                         
1381                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + fileGroup[thisFastaName] + ".");     m->mothurOutEndLine();
1382                 }
1383                 
1384                 return totalSeqs;
1385         }
1386         catch(exception& e) {
1387                 m->errorOut(e, "ChimeraSlayerCommand", "driverGroups");
1388                 exit(1);
1389         }
1390 }
1391 /**************************************************************************************************/
1392 int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accnos, string fasta, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup) {
1393         try {
1394                 int process = 1;
1395                 int num = 0;
1396                 processIDS.clear();
1397                 
1398                 if (fileToPriority.size() < processors) { processors = fileToPriority.size(); }
1399                 
1400                 int groupsPerProcessor = fileToPriority.size() / processors;
1401                 int remainder = fileToPriority.size() % processors;
1402                 
1403                 vector< map<string, map<string, int> > > breakUp;
1404                 
1405                 for (int i = 0; i < processors; i++) {
1406                         map<string, map<string, int> > thisFileToPriority;
1407                         map<string, map<string, int> >::iterator itFile;
1408                         int count = 0;
1409                         int enough = groupsPerProcessor;
1410                         if (i == 0) { enough = groupsPerProcessor + remainder; }
1411                         
1412                         for (itFile = fileToPriority.begin(); itFile != fileToPriority.end();) {
1413                                 thisFileToPriority[itFile->first] = itFile->second;
1414                                 fileToPriority.erase(itFile++);
1415                                 count++;
1416                                 if (count == enough) { break; }
1417                         }       
1418                         breakUp.push_back(thisFileToPriority);
1419                 }
1420                                 
1421 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1422                 //loop through and create all the processes you want
1423                 while (process != processors) {
1424                         int pid = fork();
1425                         
1426                         if (pid > 0) {
1427                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1428                                 process++;
1429                         }else if (pid == 0){
1430                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup);
1431                                 
1432                                 //pass numSeqs to parent
1433                                 ofstream out;
1434                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1435                                 m->openOutputFile(tempFile, out);
1436                                 out << num << endl;
1437                                 out.close();
1438                                 exit(0);
1439                         }else { 
1440                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1441                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1442                                 exit(0);
1443                         }
1444                 }
1445                 
1446                 num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup);
1447
1448                 //force parent to wait until all the processes are done
1449                 for (int i=0;i<processors;i++) { 
1450                         int temp = processIDS[i];
1451                         wait(&temp);
1452                 }
1453                 
1454                 for (int i = 0; i < processIDS.size(); i++) {
1455                         ifstream in;
1456                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1457                         m->openInputFile(tempFile, in);
1458                         if (!in.eof()) { int tempNum = 0;  in >> tempNum; num += tempNum; }
1459                         in.close(); m->mothurRemove(tempFile);
1460                 }
1461 #else
1462                 
1463                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1464                 //Windows version shared memory, so be careful when passing variables through the slayerData struct. 
1465                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1466                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1467                 
1468                 vector<slayerData*> pDataArray; 
1469                 DWORD   dwThreadIdArray[processors-1];
1470                 HANDLE  hThreadArray[processors-1]; 
1471                 
1472                 //Create processor worker threads.
1473                 for(int i=1; i<processors; i++ ){
1474                         string extension = toString(i) + ".temp";
1475                         slayerData* tempslayer = new slayerData((outputFName + extension), (fasta + extension), (accnos + extension), templatefile, search, blastlocation, trimera, trim, realign, m, breakUp[i], fileGroup, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
1476                         pDataArray.push_back(tempslayer);
1477                         processIDS.push_back(i);
1478                         
1479                         //MySlayerThreadFunction is in header. It must be global or static to work with the threads.
1480                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1481                         hThreadArray[i-1] = CreateThread(NULL, 0, MySlayerGroupThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1482                 }
1483                 
1484                 num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup);
1485                 
1486                 //Wait until all threads have terminated.
1487                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1488                 
1489                 //Close all thread handles and free memory allocations.
1490                 for(int i=0; i < pDataArray.size(); i++){
1491                         num += pDataArray[i]->count;
1492                         CloseHandle(hThreadArray[i]);
1493                         delete pDataArray[i];
1494                 }
1495 #endif  
1496                 
1497                 //append output files
1498                 for(int i=0;i<processIDS.size();i++){
1499                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1500                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1501                         
1502                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1503                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1504                         
1505                         if (trim) {
1506                                 m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta);
1507                                 m->mothurRemove((fasta + toString(processIDS[i]) + ".temp"));
1508                         }
1509                 }
1510                 
1511                 
1512                 return num;
1513         }
1514         catch(exception& e) {
1515                 m->errorOut(e, "ChimeraSlayerCommand", "createProcessesGroups");
1516                 exit(1);
1517         }
1518 }
1519 //**********************************************************************************************************************
1520
1521 int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string filename, string accnos, string fasta, map<string, int>& priority){
1522         try {
1523                 
1524                 Chimera* chimera;
1525                 if (templatefile != "self") { //you want to run slayer with a reference template
1526                         chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());       
1527                 }else {
1528                         chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());     
1529                 }
1530                 
1531                 if (m->control_pressed) { delete chimera; return 0; }
1532                 
1533                 if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
1534                 templateSeqsLength = chimera->getLength();
1535                 
1536                 ofstream out;
1537                 m->openOutputFile(outputFName, out);
1538                 
1539                 ofstream out2;
1540                 m->openOutputFile(accnos, out2);
1541                 
1542                 ofstream out3;
1543                 if (trim) {  m->openOutputFile(fasta, out3); }
1544                 
1545                 ifstream inFASTA;
1546                 m->openInputFile(filename, inFASTA);
1547
1548                 inFASTA.seekg(filePos.start);
1549                 
1550                 if (filePos.start == 0) { chimera->printHeader(out); }
1551
1552                 bool done = false;
1553                 int count = 0;
1554         
1555                 while (!done) {
1556                 
1557                         if (m->control_pressed) {       delete chimera; out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1;       }
1558                 
1559                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
1560                         string candidateAligned = candidateSeq->getAligned();
1561                         
1562                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
1563                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
1564                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
1565                                 }else{
1566                                         //find chimeras
1567                                         chimera->getChimeras(candidateSeq);
1568                                         
1569                                         if (m->control_pressed) {       delete chimera; delete candidateSeq; return 1;  }
1570                                                 
1571                                         //if you are not chimeric, then check each half
1572                                         data_results wholeResults = chimera->getResults();
1573                                         
1574                                         //determine if we need to split
1575                                         bool isChimeric = false;
1576                                         
1577                                         if (wholeResults.flag == "yes") {
1578                                                 string chimeraFlag = "no";
1579                                                 if(  (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR)
1580                                                    ||
1581                                                    (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
1582                                                 
1583                                                 
1584                                                 if (chimeraFlag == "yes") {     
1585                                                         if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; }
1586                                                 }
1587                                         }
1588                                         
1589                                         if ((!isChimeric) && trimera) {
1590                                                 
1591                                                 //split sequence in half by bases
1592                                                 string leftQuery, rightQuery;
1593                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
1594                                                 divideInHalf(tempSeq, leftQuery, rightQuery);
1595                                                 
1596                                                 //run chimeraSlayer on each piece
1597                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
1598                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
1599                                                 
1600                                                 //find chimeras
1601                                                 chimera->getChimeras(left);
1602                                                 data_results leftResults = chimera->getResults();
1603                                                 
1604                                                 chimera->getChimeras(right);
1605                                                 data_results rightResults = chimera->getResults();
1606                                                 
1607                                                 //if either piece is chimeric then report
1608                                                 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
1609                                                 if (trim) { trimmed.printSequence(out3);  }
1610                                                 
1611                                                 delete left; delete right;
1612                                                 
1613                                         }else { //already chimeric
1614                                                 //print results
1615                                                 Sequence trimmed = chimera->print(out, out2);
1616                                                 if (trim) { trimmed.printSequence(out3);  }
1617                                         }
1618                                         
1619                                         
1620                                 }
1621                                 count++;
1622                         }
1623                         
1624                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1625                                 unsigned long long pos = inFASTA.tellg();
1626                                 if ((pos == -1) || (pos >= filePos.end)) { break; }
1627                         #else
1628                                 if (inFASTA.eof()) { break; }
1629                         #endif
1630                         
1631                         delete candidateSeq;
1632                         //report progress
1633                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1634                 }
1635                 //report progress
1636                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1637                 
1638                 int numNoParents = chimera->getNumNoParents();
1639                 if (numNoParents == count) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } 
1640                 
1641                 out.close();
1642                 out2.close();
1643                 if (trim) { out3.close(); }
1644                 inFASTA.close();
1645                 delete chimera;
1646                                 
1647                 return count;
1648                 
1649                 
1650         }
1651         catch(exception& e) {
1652                 m->errorOut(e, "ChimeraSlayerCommand", "driver");
1653                 exit(1);
1654         }
1655 }
1656 //**********************************************************************************************************************
1657 #ifdef USE_MPI
1658 int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector<unsigned long long>& MPIPos, string filename, map<string, int>& priority, bool byGroup){
1659         try {
1660                 MPI_Status status; 
1661                 int pid;
1662                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1663                 
1664                 Chimera* chimera;
1665                 if (templatefile != "self") { //you want to run slayer with a reference template
1666                         chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());       
1667                 }else {
1668                         chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand(), byGroup);    
1669                 }
1670                 
1671                 if (m->control_pressed) { delete chimera; return 0; }
1672                 
1673                 if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
1674                 templateSeqsLength = chimera->getLength();
1675                 
1676                 for(int i=0;i<num;i++){
1677                         
1678                         if (m->control_pressed) {       delete chimera; return 1;       }
1679                         
1680                         //read next sequence
1681                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1682
1683                         char* buf4 = new char[length];
1684                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1685         
1686                         string tempBuf = buf4;
1687                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1688                         istringstream iss (tempBuf,istringstream::in);
1689
1690                         delete buf4;
1691
1692                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
1693                         string candidateAligned = candidateSeq->getAligned();
1694                 
1695                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
1696                                 
1697                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
1698                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
1699                                 }else{
1700                 
1701                                         //find chimeras
1702                                         chimera->getChimeras(candidateSeq);
1703                         
1704                                         if (m->control_pressed) {       delete chimera; delete candidateSeq; return 1;  }
1705                                         
1706                                         //if you are not chimeric, then check each half
1707                                         data_results wholeResults = chimera->getResults();
1708                                         
1709                                         //determine if we need to split
1710                                         bool isChimeric = false;
1711                                         
1712                                         if (wholeResults.flag == "yes") {
1713                                                 string chimeraFlag = "no";
1714                                                 if(  (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR)
1715                                                    ||
1716                                                    (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
1717                                                 
1718                                                 
1719                                                 if (chimeraFlag == "yes") {     
1720                                                         if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; }
1721                                                 }
1722                                         }
1723                                         
1724                                         if ((!isChimeric) && trimera) {                                                 
1725                                                 //split sequence in half by bases
1726                                                 string leftQuery, rightQuery;
1727                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
1728                                                 divideInHalf(tempSeq, leftQuery, rightQuery);
1729                                                 
1730                                                 //run chimeraSlayer on each piece
1731                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
1732                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
1733                                                 
1734                                                 //find chimeras
1735                                                 chimera->getChimeras(left);
1736                                                 data_results leftResults = chimera->getResults();
1737                                                 
1738                                                 chimera->getChimeras(right);
1739                                                 data_results rightResults = chimera->getResults();
1740                                                 
1741                                                 //if either piece is chimeric then report
1742                                                 Sequence trimmed = chimera->print(outMPI, outAccMPI, leftResults, rightResults);
1743                                                 if (trim) {  
1744                                                         string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";
1745                                                         
1746                                                         //write to accnos file
1747                                                         int length = outputString.length();
1748                                                         char* buf2 = new char[length];
1749                                                         memcpy(buf2, outputString.c_str(), length);
1750                                                         
1751                                                         MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
1752                                                         delete buf2;
1753                                                 }
1754                                                 
1755                                                 delete left; delete right;
1756                                                 
1757                                         }else { 
1758                                                 //print results
1759                                                 Sequence trimmed = chimera->print(outMPI, outAccMPI);
1760                                                 
1761                                                 if (trim) {  
1762                                                         string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";
1763                                                         
1764                                                         //write to accnos file
1765                                                         int length = outputString.length();
1766                                                         char* buf2 = new char[length];
1767                                                         memcpy(buf2, outputString.c_str(), length);
1768                                                         
1769                                                         MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
1770                                                         delete buf2;
1771                                                 }
1772                                         }
1773                                         
1774                                 }
1775                         }
1776                         delete candidateSeq;
1777                         
1778                         //report progress
1779                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
1780                 }
1781                 //report progress
1782                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
1783                 
1784                 int numNoParents = chimera->getNumNoParents();
1785                 if (numNoParents == num) { cout << "[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors." << endl; }
1786                 
1787                 delete chimera;         
1788                 return 0;
1789         }
1790         catch(exception& e) {
1791                 m->errorOut(e, "ChimeraSlayerCommand", "driverMPI");
1792                 exit(1);
1793         }
1794 }
1795 #endif
1796
1797 /**************************************************************************************************/
1798
1799 int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta, map<string, int>& thisPriority) {
1800         try {
1801                 int process = 0;
1802                 int num = 0;
1803                 processIDS.clear();
1804                 
1805 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1806                 //loop through and create all the processes you want
1807                 while (process != processors) {
1808                         int pid = fork();
1809                         
1810                         if (pid > 0) {
1811                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1812                                 process++;
1813                         }else if (pid == 0){
1814                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", thisPriority);
1815                                 
1816                                 //pass numSeqs to parent
1817                                 ofstream out;
1818                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
1819                                 m->openOutputFile(tempFile, out);
1820                                 out << num << endl;
1821                                 out.close();
1822                                 exit(0);
1823                         }else { 
1824                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1825                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1826                                 exit(0);
1827                         }
1828                 }
1829                 
1830                 //force parent to wait until all the processes are done
1831                 for (int i=0;i<processors;i++) { 
1832                         int temp = processIDS[i];
1833                         wait(&temp);
1834                 }
1835                 
1836                 for (int i = 0; i < processIDS.size(); i++) {
1837                         ifstream in;
1838                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
1839                         m->openInputFile(tempFile, in);
1840                         if (!in.eof()) { int tempNum = 0;  in >> tempNum; num += tempNum; }
1841                         in.close(); m->mothurRemove(tempFile);
1842                 }
1843 #else
1844                 
1845                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1846                 //Windows version shared memory, so be careful when passing variables through the slayerData struct. 
1847                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1848                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1849                 
1850                 vector<slayerData*> pDataArray; 
1851                 DWORD   dwThreadIdArray[processors];
1852                 HANDLE  hThreadArray[processors]; 
1853                 
1854                 //Create processor worker threads.
1855                 for( int i=0; i<processors; i++ ){
1856                         string extension = toString(i) + ".temp";
1857                         slayerData* tempslayer = new slayerData((outputFileName + extension), (fasta + extension), (accnos + extension), filename, templatefile, search, blastlocation, trimera, trim, realign, m, lines[i].start, lines[i].end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
1858                         pDataArray.push_back(tempslayer);
1859                         processIDS.push_back(i);
1860                         
1861                         //MySlayerThreadFunction is in header. It must be global or static to work with the threads.
1862                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1863                         hThreadArray[i] = CreateThread(NULL, 0, MySlayerThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1864                 }
1865                                 
1866                 //Wait until all threads have terminated.
1867                 WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
1868                 
1869                 //Close all thread handles and free memory allocations.
1870                 for(int i=0; i < pDataArray.size(); i++){
1871                         num += pDataArray[i]->count;
1872                         CloseHandle(hThreadArray[i]);
1873                         delete pDataArray[i];
1874                 }
1875 #endif  
1876                 
1877                 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
1878                 rename((accnos + toString(processIDS[0]) + ".temp").c_str(), accnos.c_str());
1879                 if (trim) {  rename((fasta + toString(processIDS[0]) + ".temp").c_str(), fasta.c_str()); }
1880                 
1881                 //append output files
1882                 for(int i=1;i<processIDS.size();i++){
1883                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
1884                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
1885                         
1886                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1887                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1888                         
1889                         if (trim) {
1890                                 m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta);
1891                                 m->mothurRemove((fasta + toString(processIDS[i]) + ".temp"));
1892                         }
1893                 }
1894                 
1895                 
1896                 return num;
1897         }
1898         catch(exception& e) {
1899                 m->errorOut(e, "ChimeraSlayerCommand", "createProcesses");
1900                 exit(1);
1901         }
1902 }
1903
1904 /**************************************************************************************************/
1905
1906 int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, string& rightQuery) {
1907         try {
1908                 
1909                 string queryUnAligned = querySeq.getUnaligned();
1910                 int numBases = int(queryUnAligned.length() * 0.5);
1911                 
1912                 string queryAligned = querySeq.getAligned();
1913                 leftQuery = querySeq.getAligned();
1914                 rightQuery = querySeq.getAligned();
1915                 
1916                 int baseCount = 0;
1917                 int leftSpot = 0;
1918                 for (int i = 0; i < queryAligned.length(); i++) {
1919                         //if you are a base
1920                         if (isalpha(queryAligned[i])) {         
1921                                 baseCount++; 
1922                         }
1923                         
1924                         //if you have half
1925                         if (baseCount >= numBases) {  leftSpot = i; break; } //first half
1926                 }
1927                 
1928                 //blank out right side
1929                 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
1930                 
1931                 //blank out left side
1932                 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
1933                 
1934                 return 0;
1935                 
1936         }
1937         catch(exception& e) {
1938                 m->errorOut(e, "ChimeraSlayerCommand", "divideInHalf");
1939                 exit(1);
1940         }
1941 }
1942 /**************************************************************************************************/
1943 map<string, int> ChimeraSlayerCommand::sortFastaFile(string fastaFile, string nameFile) {
1944         try {
1945                 map<string, int> nameAbund;
1946                 
1947                 //read through fastafile and store info
1948                 map<string, string> seqs;
1949                 ifstream in;
1950                 m->openInputFile(fastaFile, in);
1951                 
1952                 while (!in.eof()) {
1953                         
1954                         if (m->control_pressed) { in.close(); return nameAbund; }
1955                         
1956                         Sequence seq(in); m->gobble(in);
1957                         seqs[seq.getName()] = seq.getAligned();
1958                 }
1959                 
1960                 in.close();
1961                 
1962                 //read namefile or countfile
1963                 vector<seqPriorityNode> nameMapCount;
1964         int error;
1965         if (hasCount) { 
1966             CountTable ct;
1967             ct.readTable(nameFile);
1968             
1969             for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
1970                 int num = ct.getNumSeqs(it->first);
1971                 if (num == 0) { error = 1; }
1972                 else {
1973                     seqPriorityNode temp(num, it->second, it->first);
1974                     nameMapCount.push_back(temp);
1975                 }
1976             }
1977         }else { error = m->readNames(nameFile, nameMapCount, seqs); }
1978                 
1979                 if (m->control_pressed) { return nameAbund; }
1980                 
1981                 if (error == 1) { m->control_pressed = true; return nameAbund; }
1982                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return nameAbund; }
1983
1984                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
1985                 
1986                 string newFasta = fastaFile + ".temp";
1987                 ofstream out;
1988                 m->openOutputFile(newFasta, out);
1989                 
1990                 //print new file in order of
1991                 for (int i = 0; i < nameMapCount.size(); i++) {
1992                         out << ">" << nameMapCount[i].name << endl << nameMapCount[i].seq << endl;
1993                         nameAbund[nameMapCount[i].name] = nameMapCount[i].numIdentical;
1994                 }
1995                 out.close();
1996                 
1997                 rename(newFasta.c_str(), fastaFile.c_str());
1998                                 
1999                 return nameAbund;
2000                 
2001         }
2002         catch(exception& e) {
2003                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
2004                 exit(1);
2005         }
2006 }
2007 /**************************************************************************************************/
2008 map<string, int> ChimeraSlayerCommand::sortFastaFile(vector<Sequence>& thisseqs, map<string, string>& nameMap, string newFile) {
2009         try {
2010                 map<string, int> nameAbund;
2011                 vector<seqPriorityNode> nameVector;
2012                 
2013                 //read through fastafile and store info
2014                 map<string, string> seqs;
2015                                 
2016                 for (int i = 0; i < thisseqs.size(); i++) {
2017                         
2018                         if (m->control_pressed) { return nameAbund; }
2019                         
2020                         map<string, string>::iterator itNameMap = nameMap.find(thisseqs[i].getName());
2021                         
2022                         if (itNameMap == nameMap.end()){
2023                                 m->control_pressed = true;
2024                                 m->mothurOut("[ERROR]: " + thisseqs[i].getName() + " is in your fastafile, but is not in your namesfile, please correct."); m->mothurOutEndLine();
2025                         }else {
2026                                 int num = m->getNumNames(itNameMap->second);
2027                                 
2028                                 seqPriorityNode temp(num, thisseqs[i].getAligned(), thisseqs[i].getName());
2029                                 nameVector.push_back(temp);
2030                         }
2031                 }
2032         
2033                 //sort by num represented
2034                 sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
2035         
2036                 if (m->control_pressed) { return nameAbund; }
2037                 
2038                 if (thisseqs.size() != nameVector.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return nameAbund; }
2039                                 
2040                 ofstream out;
2041                 m->openOutputFile(newFile, out);
2042                 
2043                 //print new file in order of
2044                 for (int i = 0; i < nameVector.size(); i++) {
2045                         out << ">" << nameVector[i].name << endl << nameVector[i].seq << endl;
2046                         nameAbund[nameVector[i].name] = nameVector[i].numIdentical;
2047                 }
2048                 out.close();
2049                 
2050                 return nameAbund;
2051                 
2052         }
2053         catch(exception& e) {
2054                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
2055                 exit(1);
2056         }
2057 }
2058 /**************************************************************************************************/
2059 int ChimeraSlayerCommand::sortFastaFile(vector<Sequence>& thisseqs, map<string, int>& countMap, string newFile) {
2060         try {
2061                 vector<seqPriorityNode> nameVector;
2062                 
2063                 //read through fastafile and store info
2064                 map<string, string> seqs;
2065         
2066                 for (int i = 0; i < thisseqs.size(); i++) {
2067                         
2068                         if (m->control_pressed) { return 0; }
2069                         
2070                         map<string, int>::iterator itCountMap = countMap.find(thisseqs[i].getName());
2071                         
2072                         if (itCountMap == countMap.end()){
2073                                 m->control_pressed = true;
2074                                 m->mothurOut("[ERROR]: " + thisseqs[i].getName() + " is in your fastafile, but is not in your count file, please correct."); m->mothurOutEndLine();
2075                         }else {
2076                 seqPriorityNode temp(itCountMap->second, thisseqs[i].getAligned(), thisseqs[i].getName());
2077                                 nameVector.push_back(temp);
2078                         }
2079                 }
2080         
2081                 //sort by num represented
2082                 sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
2083         
2084                 if (m->control_pressed) { return 0; }
2085                 
2086                 if (thisseqs.size() != nameVector.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your count file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
2087         
2088                 ofstream out;
2089                 m->openOutputFile(newFile, out);
2090                 
2091                 //print new file in order of
2092                 for (int i = 0; i < nameVector.size(); i++) {
2093                         out << ">" << nameVector[i].name << endl << nameVector[i].seq << endl;
2094                 }
2095                 out.close();
2096                 
2097                 return 0;
2098                 
2099         }
2100         catch(exception& e) {
2101                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
2102                 exit(1);
2103         }
2104 }
2105 /**************************************************************************************************/
2106