]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayercommand.cpp
changed random forest output filename
[mothur.git] / chimeraslayercommand.cpp
1 /*
2  *  chimeraslayercommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 3/31/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayercommand.h"
11 #include "deconvolutecommand.h"
12 #include "referencedb.h"
13 #include "sequenceparser.h"
14 #include "counttable.h"
15
16 //**********************************************************************************************************************
17 vector<string> ChimeraSlayerCommand::setParameters(){   
18         try {
19                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
20                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
21                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
22          CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
24                 CommandParameter pwindow("window", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pwindow);
25                 CommandParameter pksize("ksize", "Number", "", "7", "", "", "","",false,false); parameters.push_back(pksize);
26                 CommandParameter pmatch("match", "Number", "", "5.0", "", "", "","",false,false); parameters.push_back(pmatch);
27                 CommandParameter pmismatch("mismatch", "Number", "", "-4.0", "", "", "","",false,false); parameters.push_back(pmismatch);
28                 CommandParameter pminsim("minsim", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminsim);
29                 CommandParameter pmincov("mincov", "Number", "", "70", "", "", "","",false,false); parameters.push_back(pmincov);
30                 CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminsnp);
31                 CommandParameter pminbs("minbs", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminbs);
32                 CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "","",false,false); parameters.push_back(psearch);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
34         
35                 CommandParameter prealign("realign", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prealign);
36                 CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "","fasta",false,false); parameters.push_back(ptrim);
37                 CommandParameter psplit("split", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psplit);
38                 CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "","",false,false); parameters.push_back(pnumwanted);
39                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
40                 CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "","",false,false); parameters.push_back(pdivergence);
41         CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
42                 CommandParameter pparents("parents", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pparents);
43                 CommandParameter pincrement("increment", "Number", "", "5", "", "", "","",false,false); parameters.push_back(pincrement);
44                 CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "","",false,false); parameters.push_back(pblastlocation);
45                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
46                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
47                 CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
48
49                 vector<string> myArray;
50                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "ChimeraSlayerCommand", "setParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string ChimeraSlayerCommand::getHelpString(){   
60         try {
61                 string helpString = "";
62                 helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
63                 helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
64                 helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, dereplicate, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n";
65                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
66                 helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
67                 helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
68         helpString += "The count parameter allows you to provide a count file. The count file reference=self. If your count file contains group information, when checking sequences, only sequences from the same group as the query sequence will be used as the reference. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
69                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
70                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
71                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
72 #ifdef USE_MPI
73                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
74 #endif
75         helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
76                 helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n";
77                 helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n";
78                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n";
79                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n";
80                 helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n";
81                 helpString += "The ksize parameter allows you to input kmersize, default is 7, used if search is kmer. \n";
82                 helpString += "The match parameter allows you to reward matched bases in blast search, default is 5. \n";
83                 helpString += "The parents parameter allows you to select the number of potential parents to investigate from the numwanted best matches after rating them, default is 3. \n";
84                 helpString += "The mismatch parameter allows you to penalize mismatched bases in blast search, default is -4. \n";
85                 helpString += "The divergence parameter allows you to set a cutoff for chimera determination, default is 1.007. \n";
86                 helpString += "The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method, default=1000.\n";
87                 helpString += "The minsim parameter allows you to specify a minimum similarity with the parent fragments, default=90. \n";
88                 helpString += "The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n";
89                 helpString += "The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n";
90                 helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n";
91                 helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast, and kmer, default blast. \n";
92                 helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true.  \n";
93                 helpString += "The blastlocation parameter allows you to specify the location of your blast executable. By default mothur will look in ./blast/bin relative to mothur's executable.  \n";
94                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
95                 helpString += "The chimera.slayer command should be in the following format: \n";
96                 helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
97                 helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
98                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
99                 return helpString;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraSlayerCommand", "getHelpString");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 string ChimeraSlayerCommand::getOutputPattern(string type) {
108     try {
109         string pattern = "";
110         
111         if (type == "chimera") {  pattern = "[filename],slayer.chimeras"; } 
112         else if (type == "accnos") {  pattern = "[filename],slayer.accnos"; } 
113         else if (type == "fasta") {  pattern = "[filename],slayer.fasta"; }
114         else if (type == "count") {  pattern = "[filename],slayer.pick.count_table"; } 
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "ChimeraSlayerCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125 ChimeraSlayerCommand::ChimeraSlayerCommand(){   
126         try {
127                 abort = true; calledHelp = true;
128                 setParameters();
129                 vector<string> tempOutNames;
130                 outputTypes["chimera"] = tempOutNames;
131                 outputTypes["accnos"] = tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141 ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
142         try {
143                 abort = false; calledHelp = false;   
144                 ReferenceDB* rdb = ReferenceDB::getInstance();
145         hasCount = false;
146         hasName = false;
147                 
148                 //allow user to run help
149                 if(option == "help") { help(); abort = true; calledHelp = true; }
150                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151                 
152                 else {
153                         vector<string> myArray = setParameters();
154                         
155                         OptionParser parser(option);
156                         map<string,string> parameters = parser.getParameters();
157                         
158                         ValidParameters validParameter("chimera.slayer");
159                         map<string,string>::iterator it;
160                         
161                         //check to make sure all parameters are valid for command
162                         for (it = parameters.begin(); it != parameters.end(); it++) { 
163                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
164                         }
165                         
166                         vector<string> tempOutNames;
167                         outputTypes["chimera"] = tempOutNames;
168                         outputTypes["accnos"] = tempOutNames;
169                         outputTypes["fasta"] = tempOutNames;
170             outputTypes["count"] = tempOutNames;
171                 
172                         //if the user changes the input directory command factory will send this info to us in the output parameter 
173                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
174                         if (inputDir == "not found"){   inputDir = "";          }
175                                                 
176                         //check for required parameters
177                         fastafile = validParameter.validFile(parameters, "fasta", false);
178                         if (fastafile == "not found") {                                 
179                                 //if there is a current fasta file, use it
180                                 string filename = m->getFastaFile(); 
181                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
182                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
183                         }else { 
184                                 m->splitAtDash(fastafile, fastaFileNames);
185                                 
186                                 //go through files and make sure they are good, if not, then disregard them
187                                 for (int i = 0; i < fastaFileNames.size(); i++) {
188                                         
189                                         bool ignore = false;
190                                         if (fastaFileNames[i] == "current") { 
191                                                 fastaFileNames[i] = m->getFastaFile(); 
192                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
193                                                 else {  
194                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
195                                                         //erase from file list
196                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
197                                                         i--;
198                                                 }
199                                         }
200                                         
201                                         if (!ignore) {
202                                                 
203                                                 if (inputDir != "") {
204                                                         string path = m->hasPath(fastaFileNames[i]);
205                                                         //if the user has not given a path then, add inputdir. else leave path alone.
206                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
207                                                 }
208                 
209                                                 int ableToOpen;
210                                                 ifstream in;
211                                                 
212                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
213                                         
214                                                 //if you can't open it, try default location
215                                                 if (ableToOpen == 1) {
216                                                         if (m->getDefaultPath() != "") { //default path is set
217                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
218                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
219                                                                 ifstream in2;
220                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
221                                                                 in2.close();
222                                                                 fastaFileNames[i] = tryPath;
223                                                         }
224                                                 }
225                                                 
226                                                 if (ableToOpen == 1) {
227                                                         if (m->getOutputDir() != "") { //default path is set
228                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
229                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
230                                                                 ifstream in2;
231                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
232                                                                 in2.close();
233                                                                 fastaFileNames[i] = tryPath;
234                                                         }
235                                                 }
236                                                 
237                                                 in.close();
238                                                 
239                                                 if (ableToOpen == 1) { 
240                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
241                                                         //erase from file list
242                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
243                                                         i--;
244                                                 }else {
245                                                         m->setFastaFile(fastaFileNames[i]);
246                                                 }
247                                         }
248                                 }
249                                 
250                                 //make sure there is at least one valid file left
251                                 if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
252                         }
253                         
254                         
255                         //check for required parameters
256                         namefile = validParameter.validFile(parameters, "name", false);
257                         if (namefile == "not found") { namefile = "";   }
258                         else { 
259                                 m->splitAtDash(namefile, nameFileNames);
260                                 
261                                 //go through files and make sure they are good, if not, then disregard them
262                                 for (int i = 0; i < nameFileNames.size(); i++) {
263                                         
264                                         bool ignore = false;
265                                         if (nameFileNames[i] == "current") { 
266                                                 nameFileNames[i] = m->getNameFile(); 
267                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
268                                                 else {  
269                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
270                                                         //erase from file list
271                                                         nameFileNames.erase(nameFileNames.begin()+i);
272                                                         i--;
273                                                 }
274                                         }
275                                         
276                                         if (!ignore) {
277                                                 
278                                                 if (inputDir != "") {
279                                                         string path = m->hasPath(nameFileNames[i]);
280                                                         //if the user has not given a path then, add inputdir. else leave path alone.
281                                                         if (path == "") {       nameFileNames[i] = inputDir + nameFileNames[i];         }
282                                                 }
283                                                 
284                                                 int ableToOpen;
285                                                 ifstream in;
286                                                 
287                                                 ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
288                                                 
289                                                 //if you can't open it, try default location
290                                                 if (ableToOpen == 1) {
291                                                         if (m->getDefaultPath() != "") { //default path is set
292                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
293                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
294                                                                 ifstream in2;
295                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
296                                                                 in2.close();
297                                                                 nameFileNames[i] = tryPath;
298                                                         }
299                                                 }
300                                                 
301                                                 if (ableToOpen == 1) {
302                                                         if (m->getOutputDir() != "") { //default path is set
303                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
304                                                                 m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
305                                                                 ifstream in2;
306                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
307                                                                 in2.close();
308                                                                 nameFileNames[i] = tryPath;
309                                                         }
310                                                 }
311                                                 
312                                                 in.close();
313                                                 
314                                                 if (ableToOpen == 1) { 
315                                                         m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
316                                                         //erase from file list
317                                                         nameFileNames.erase(nameFileNames.begin()+i);
318                                                         i--;
319                                                 }else {
320                                                         m->setNameFile(nameFileNames[i]);
321                                                 }
322                                         }
323                                 }
324                         }
325             
326             if (nameFileNames.size() != 0) { hasName = true; }
327             
328             //check for required parameters
329             vector<string> countfileNames;
330                         countfile = validParameter.validFile(parameters, "count", false);
331                         if (countfile == "not found") { 
332                 countfile = "";  
333                         }else { 
334                                 m->splitAtDash(countfile, countfileNames);
335                                 
336                                 //go through files and make sure they are good, if not, then disregard them
337                                 for (int i = 0; i < countfileNames.size(); i++) {
338                                         
339                                         bool ignore = false;
340                                         if (countfileNames[i] == "current") { 
341                                                 countfileNames[i] = m->getCountTableFile(); 
342                                                 if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
343                                                 else {  
344                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
345                                                         //erase from file list
346                                                         countfileNames.erase(countfileNames.begin()+i);
347                                                         i--;
348                                                 }
349                                         }
350                                         
351                                         if (!ignore) {
352                                                 
353                                                 if (inputDir != "") {
354                                                         string path = m->hasPath(countfileNames[i]);
355                                                         //if the user has not given a path then, add inputdir. else leave path alone.
356                                                         if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
357                                                 }
358                                                 
359                                                 int ableToOpen;
360                                                 ifstream in;
361                                                 
362                                                 ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
363                                                 
364                                                 //if you can't open it, try default location
365                                                 if (ableToOpen == 1) {
366                                                         if (m->getDefaultPath() != "") { //default path is set
367                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
368                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
369                                                                 ifstream in2;
370                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
371                                                                 in2.close();
372                                                                 countfileNames[i] = tryPath;
373                                                         }
374                                                 }
375                                                 
376                                                 if (ableToOpen == 1) {
377                                                         if (m->getOutputDir() != "") { //default path is set
378                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
379                                                                 m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
380                                                                 ifstream in2;
381                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
382                                                                 in2.close();
383                                                                 countfileNames[i] = tryPath;
384                                                         }
385                                                 }
386                                                 
387                                                 in.close();
388                                                 
389                                                 if (ableToOpen == 1) { 
390                                                         m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
391                                                         //erase from file list
392                                                         countfileNames.erase(countfileNames.begin()+i);
393                                                         i--;
394                                                 }else {
395                                                         m->setCountTableFile(countfileNames[i]);
396                                                 }
397                                         }
398                                 }
399                         }
400             
401             if (countfileNames.size() != 0) { hasCount = true; }
402             
403                         //make sure there is at least one valid file left
404             if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
405             
406             if (!hasName && hasCount) { nameFileNames = countfileNames; }
407             
408                         if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
409                         
410                         bool hasGroup = true;
411                         groupfile = validParameter.validFile(parameters, "group", false);
412                         if (groupfile == "not found") { groupfile = "";  hasGroup = false; }
413                         else { 
414                                 m->splitAtDash(groupfile, groupFileNames);
415                                 
416                                 //go through files and make sure they are good, if not, then disregard them
417                                 for (int i = 0; i < groupFileNames.size(); i++) {
418                                         
419                                         bool ignore = false;
420                                         if (groupFileNames[i] == "current") { 
421                                                 groupFileNames[i] = m->getGroupFile(); 
422                                                 if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
423                                                 else {  
424                                                         m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
425                                                         //erase from file list
426                                                         groupFileNames.erase(groupFileNames.begin()+i);
427                                                         i--;
428                                                 }
429                                         }
430                                         
431                                         if (!ignore) {
432                                                 
433                                                 if (inputDir != "") {
434                                                         string path = m->hasPath(groupFileNames[i]);
435                                                         //if the user has not given a path then, add inputdir. else leave path alone.
436                                                         if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }
437                                                 }
438                                                 
439                                                 int ableToOpen;
440                                                 ifstream in;
441                                                 
442                                                 ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
443                                                 
444                                                 //if you can't open it, try default location
445                                                 if (ableToOpen == 1) {
446                                                         if (m->getDefaultPath() != "") { //default path is set
447                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
448                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
449                                                                 ifstream in2;
450                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
451                                                                 in2.close();
452                                                                 groupFileNames[i] = tryPath;
453                                                         }
454                                                 }
455                                                 
456                                                 if (ableToOpen == 1) {
457                                                         if (m->getOutputDir() != "") { //default path is set
458                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
459                                                                 m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
460                                                                 ifstream in2;
461                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
462                                                                 in2.close();
463                                                                 groupFileNames[i] = tryPath;
464                                                         }
465                                                 }
466                                                 
467                                                 in.close();
468                                                 
469                                                 if (ableToOpen == 1) { 
470                                                         m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
471                                                         //erase from file list
472                                                         groupFileNames.erase(groupFileNames.begin()+i);
473                                                         i--;
474                                                 }else {
475                                                         m->setGroupFile(groupFileNames[i]);
476                                                 }
477                                         }
478                                 }
479                                 
480                                 //make sure there is at least one valid file left
481                                 if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
482                         }
483                         
484                         if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
485                         
486             if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }                      
487                         //if the user changes the output directory command factory will send this info to us in the output parameter 
488                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
489                         
490                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
491                         m->setProcessors(temp);
492                         m->mothurConvert(temp, processors);
493                         
494                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
495                         save = m->isTrue(temp); 
496                         rdb->save = save; 
497                         if (save) { //clear out old references
498                                 rdb->clearMemory();     
499                         }
500                         
501                         string path;
502                         it = parameters.find("reference");
503                         //user has given a template file
504                         if(it != parameters.end()){ 
505                                 if (it->second == "self") { 
506                                         templatefile = "self"; 
507                                         if (save) {
508                                                 m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
509                                                 m->mothurOutEndLine();
510                                                 save = false;
511                                         }
512                                 }
513                                 else {
514                                         path = m->hasPath(it->second);
515                                         //if the user has not given a path then, add inputdir. else leave path alone.
516                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
517                                         
518                                         templatefile = validParameter.validFile(parameters, "reference", true);
519                                         if (templatefile == "not open") { abort = true; }
520                                         else if (templatefile == "not found") { //check for saved reference sequences
521                                                 if (rdb->referenceSeqs.size() != 0) {
522                                                         templatefile = "saved";
523                                                 }else {
524                                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
525                                                         m->mothurOutEndLine();
526                                                         abort = true; 
527                                                 }
528                                         }else { if (save) {     rdb->setSavedReference(templatefile);   }       }       
529                                 }
530                         }else if (hasName) {  templatefile = "self"; 
531                                 if (save) {
532                                         m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
533                                         m->mothurOutEndLine();
534                                         save = false;
535                                 }
536                         }else if (hasCount) {  templatefile = "self"; 
537                                 if (save) {
538                                         m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
539                                         m->mothurOutEndLine();
540                                         save = false;
541                                 }
542                         }
543                         else { 
544                                 if (rdb->referenceSeqs.size() != 0) {
545                                         templatefile = "saved";
546                                 }else {
547                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
548                                         m->mothurOutEndLine();
549                                         templatefile = ""; abort = true; 
550                                 } 
551                         }
552                         
553                         
554                         
555                         temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
556                         m->mothurConvert(temp, ksize);
557                                                 
558                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "50"; }                       
559                         m->mothurConvert(temp, window);
560                         
561                         temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
562                         m->mothurConvert(temp, match);
563                         
564                         temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
565                         m->mothurConvert(temp, mismatch);
566                         
567                         temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
568                         m->mothurConvert(temp, divR);
569                         
570                         temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
571                         m->mothurConvert(temp, minSimilarity);
572                         
573                         temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
574                         m->mothurConvert(temp, minCoverage);
575                         
576                         temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
577                         m->mothurConvert(temp, minBS);
578                         
579                         temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
580                         m->mothurConvert(temp, minSNP);
581
582                         temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
583                         m->mothurConvert(temp, parents); 
584                         
585                         temp = validParameter.validFile(parameters, "realign", false);                  if (temp == "not found") { temp = "t"; }
586                         realign = m->isTrue(temp); 
587                         
588                         temp = validParameter.validFile(parameters, "trim", false);                             if (temp == "not found") { temp = "f"; }
589                         trim = m->isTrue(temp); 
590                         
591                         temp = validParameter.validFile(parameters, "split", false);                    if (temp == "not found") { temp = "f"; }
592                         trimera = m->isTrue(temp); 
593                         
594                         search = validParameter.validFile(parameters, "search", false);                 if (search == "not found") { search = "blast"; }
595                         
596                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }             
597                         m->mothurConvert(temp, iters); 
598                          
599                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "5"; }
600                         m->mothurConvert(temp, increment);
601                         
602                         temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "15"; }               
603                         m->mothurConvert(temp, numwanted);
604             
605                         temp = validParameter.validFile(parameters, "dereplicate", false);      
606                         if (temp == "not found") { temp = "false";                      }
607                         dups = m->isTrue(temp);
608                         
609                         blastlocation = validParameter.validFile(parameters, "blastlocation", false);   
610                         if (blastlocation == "not found") { blastlocation = ""; }
611                         else {
612                                 //add / to name if needed
613                                 string lastChar = blastlocation.substr(blastlocation.length()-1);
614 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
615                                 if (lastChar != "/") { blastlocation += "/"; }
616 #else
617                                 if (lastChar != "\\") { blastlocation += "\\"; }        
618 #endif
619                                 blastlocation = m->getFullPathName(blastlocation);
620                                 string formatdbCommand = "";
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622                                 formatdbCommand = blastlocation + "formatdb";   
623 #else
624                                 formatdbCommand = blastlocation + "formatdb.exe";
625 #endif
626                                 
627                                 //test to make sure formatdb exists
628                                 ifstream in;
629                                 formatdbCommand = m->getFullPathName(formatdbCommand);
630                                 int ableToOpen = m->openInputFile(formatdbCommand, in, "no error"); in.close();
631                                 if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
632                                 
633                                 string blastCommand = "";
634 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
635                                 blastCommand = blastlocation + "megablast";     
636 #else
637                                 blastCommand = blastlocation + "megablast.exe";
638 #endif
639                                 //test to make sure formatdb exists
640                                 ifstream in2;
641                                 blastCommand = m->getFullPathName(blastCommand);
642                                 ableToOpen = m->openInputFile(blastCommand, in2, "no error"); in2.close();
643                                 if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
644                         }
645
646                         if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
647                         
648                         if ((hasName || hasCount) && (templatefile != "self")) { m->mothurOut("You have provided a namefile or countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
649                         if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
650
651                         //until we resolve the issue 10-18-11
652 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
653 #else
654                         //processors=1;
655 #endif
656                 }
657         }
658         catch(exception& e) {
659                 m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand");
660                 exit(1);
661         }
662 }
663 //***************************************************************************************************************
664
665 int ChimeraSlayerCommand::execute(){
666         try{
667                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
668
669                 for (int s = 0; s < fastaFileNames.size(); s++) {
670                                 
671                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
672                 
673                         int start = time(NULL); 
674                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it       
675                         map<string, string> variables; 
676             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
677                         string outputFileName = getOutputFileName("chimera", variables);
678                         string accnosFileName = getOutputFileName("accnos", variables);
679                         string trimFastaFileName = getOutputFileName("fasta", variables);
680             string newCountFile = "";
681                         
682                         //clears files
683                         ofstream out, out1, out2;
684                         m->openOutputFile(outputFileName, out); out.close(); 
685                         m->openOutputFile(accnosFileName, out1); out1.close();
686                         if (trim) { m->openOutputFile(trimFastaFileName, out2); out2.close(); }
687                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
688                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
689                         if (trim) {  outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); }                     
690                         
691                         //maps a filename to priority map. 
692                         //if no groupfile this is fastafileNames[s] -> prioirity
693                         //if groupfile then this is each groups seqs -> priority
694                         map<string, map<string, int> > fileToPriority; 
695                         map<string, map<string, int> >::iterator itFile;
696                         map<string, string> fileGroup;
697                         fileToPriority[fastaFileNames[s]] = priority; //default
698                         fileGroup[fastaFileNames[s]] = "noGroup";
699             map<string, string> uniqueNames; 
700                         int totalChimeras = 0;
701                         lines.clear();
702                         
703                         if (templatefile == "self") { 
704                 if (hasCount) {
705                     SequenceCountParser* parser = NULL;
706                     setUpForSelfReference(parser, fileGroup, fileToPriority, s); 
707                     if (parser != NULL) { uniqueNames = parser->getAllSeqsMap(); delete parser; }
708                 }else {
709                     SequenceParser* parser = NULL;
710                     setUpForSelfReference(parser, fileGroup, fileToPriority, s); 
711                     if (parser != NULL) { uniqueNames = parser->getAllSeqsMap(); delete parser; }
712                 }
713             }
714                         
715                         if (m->control_pressed) {   for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  return 0;    }
716
717                         if (fileToPriority.size() == 1) { //you running without a groupfile
718                                 itFile = fileToPriority.begin();
719                                 string thisFastaName = itFile->first;
720                                 map<string, int> thisPriority = itFile->second;
721 #ifdef USE_MPI  
722                                 MPIExecute(thisFastaName, outputFileName, accnosFileName, trimFastaFileName, thisPriority);
723 #else
724                                 //break up file
725                                 vector<unsigned long long> positions; 
726 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
727                                 positions = m->divideFile(thisFastaName, processors);
728                                 for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
729 #else
730                                 if (processors == 1) {  lines.push_back(linePair(0, 1000)); }
731                                 else {
732                                         positions = m->setFilePosFasta(thisFastaName, numSeqs); 
733                     if (positions.size() < processors) { processors = positions.size(); }
734                                         
735                                         //figure out how many sequences you have to process
736                                         int numSeqsPerProcessor = numSeqs / processors;
737                                         for (int i = 0; i < processors; i++) {
738                                                 int startIndex =  i * numSeqsPerProcessor;
739                                                 if(i == (processors - 1)){      numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
740                                                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
741                                         }
742                                 }
743 #endif
744                                 if(processors == 1){ numSeqs = driver(lines[0], outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority);  }
745                                 else{ numSeqs = createProcesses(outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); }
746                                 
747                                 if (m->control_pressed) {  outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  return 0; }                          
748 #endif
749                         }else { //you have provided a groupfile
750                 string countFile = "";
751                 if (hasCount) {
752                     countFile = nameFileNames[s];
753                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
754                     newCountFile = getOutputFileName("count", variables);
755                 }
756 #ifdef USE_MPI  
757                                 MPIExecuteGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile, countFile);
758 #else
759                                 if (processors == 1) {
760                     numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile);
761                     if (hasCount && dups) {
762                         CountTable c; c.readTable(nameFileNames[s], true);
763                         if (!m->isBlank(newCountFile)) {
764                             ifstream in2;
765                             m->openInputFile(newCountFile, in2);
766                             
767                             string name, group;
768                             while (!in2.eof()) {
769                                 in2 >> name >> group; m->gobble(in2);
770                                 c.setAbund(name, group, 0);
771                             }
772                             in2.close();
773                         }
774                         m->mothurRemove(newCountFile);
775                         c.printTable(newCountFile);
776                     }
777
778                 }
779                                 else {  numSeqs = createProcessesGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile, countFile);                 } //destroys fileToPriority
780 #endif
781
782 #ifdef USE_MPI  
783                                 int pid; 
784                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
785                                 
786                                 if (pid == 0) {
787 #endif
788                     if (!dups) {
789                         totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName);
790                         m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
791                     }else {
792                         if (hasCount) {
793                             set<string> doNotRemove;
794                             CountTable c; c.readTable(newCountFile, true);
795                             vector<string> namesInTable = c.getNamesOfSeqs();
796                             for (int i = 0; i < namesInTable.size(); i++) {
797                                 int temp = c.getNumSeqs(namesInTable[i]);
798                                 if (temp == 0) {  c.remove(namesInTable[i]);  }
799                                 else { doNotRemove.insert((namesInTable[i])); }
800                             }
801                             //remove names we want to keep from accnos file.
802                             set<string> accnosNames = m->readAccnos(accnosFileName);
803                             ofstream out2;
804                             m->openOutputFile(accnosFileName, out2);
805                             for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
806                                 if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
807                             }
808                             out2.close();
809                             c.printTable(newCountFile);
810                             outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
811                         }
812                     }
813
814 #ifdef USE_MPI  
815                                 }
816                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
817 #endif
818                         }
819                         
820             m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");    m->mothurOutEndLine();
821                 }
822                 
823                 //set accnos file as new current accnosfile
824                 string current = "";
825                 itTypes = outputTypes.find("accnos");
826                 if (itTypes != outputTypes.end()) {
827                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
828                 }
829                 
830                 if (trim) {
831                         itTypes = outputTypes.find("fasta");
832                         if (itTypes != outputTypes.end()) {
833                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
834                         }
835                 }
836                 
837         itTypes = outputTypes.find("count");
838                 if (itTypes != outputTypes.end()) {
839                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
840                 }
841         
842                 m->mothurOutEndLine();
843                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
844                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
845                 m->mothurOutEndLine();
846
847                 return 0;
848                 
849         }
850         catch(exception& e) {
851                 m->errorOut(e, "ChimeraSlayerCommand", "execute");
852                 exit(1);
853         }
854 }
855 //**********************************************************************************************************************
856 int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosFileName, string trimFastaFileName, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup, string countlist, string countfile){
857         try {
858 #ifdef USE_MPI  
859                 int pid; 
860                 int tag = 2001;
861                 
862                 MPI_Status status; 
863                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
864                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
865         
866                 //put filenames in a vector, then pass each process a starting and ending point in the vector
867                 //all processes already have the fileToPriority and fileGroup, they just need to know which files to process
868                 map<string, map<string, int> >::iterator itFile;
869                 vector<string> filenames;
870                 for(itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { filenames.push_back(itFile->first); }
871                 
872                 int numGroupsPerProcessor = filenames.size() / processors;
873                 int startIndex =  pid * numGroupsPerProcessor;
874                 int endIndex = (pid+1) * numGroupsPerProcessor;
875                 if(pid == (processors - 1)){    endIndex = filenames.size();    }
876                 
877                 vector<unsigned long long> MPIPos;
878                 
879                 MPI_File outMPI;
880                 MPI_File outMPIAccnos;
881                 MPI_File outMPIFasta;
882         MPI_File outMPICount;
883                 
884                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
885                 int inMode=MPI_MODE_RDONLY; 
886                 
887                 char outFilename[1024];
888                 strcpy(outFilename, outputFileName.c_str());
889                 
890                 char outAccnosFilename[1024];
891                 strcpy(outAccnosFilename, accnosFileName.c_str());
892                 
893                 char outFastaFilename[1024];
894                 strcpy(outFastaFilename, trimFastaFileName.c_str());
895         
896         char outCountFilename[1024];
897                 strcpy(outCountFilename, countlist.c_str());
898                 
899                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
900                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
901                 if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
902         if (hasCount && dups) {  MPI_File_open(MPI_COMM_WORLD, outCountFilename, outMode, MPI_INFO_NULL, &outMPICount); }
903                 
904                 if (m->control_pressed) {   MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos); if (hasCount && dups) { MPI_File_close(&outMPICount); } return 0;  }
905                 
906                 //print headers
907                 if (pid == 0) { //you are the root process 
908                         m->mothurOutEndLine();
909                         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
910                         m->mothurOutEndLine();
911                         
912                         string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
913                         
914                         //print header
915                         int length = outTemp.length();
916                         char* buf2 = new char[length];
917                         memcpy(buf2, outTemp.c_str(), length);
918                         
919                         MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
920                         delete buf2;
921                 }
922                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
923                 
924                 for (int i = startIndex; i < endIndex; i++) {
925                         
926                         int start = time(NULL);
927                         int num = 0;
928                         string thisFastaName = filenames[i];
929                         map<string, int> thisPriority = fileToPriority[thisFastaName];
930                         
931                         char inFileName[1024];
932                         strcpy(inFileName, thisFastaName.c_str());
933                         MPI_File inMPI;
934                         MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
935             
936                         MPIPos = m->setFilePosFasta(thisFastaName, num); //fills MPIPos, returns numSeqs
937                         
938                         cout << endl << "Checking sequences from group: " << fileGroup[thisFastaName] << "." << endl; 
939                         
940             set<string> cnames;
941                         driverMPI(0, num, inMPI, outMPI, outMPIAccnos, outMPIFasta, cnames, MPIPos, thisFastaName, thisPriority, true);
942                         numSeqs += num;
943                         
944                         MPI_File_close(&inMPI);
945                         m->mothurRemove(thisFastaName);
946             
947             if (dups) {
948                 if (cnames.size() != 0) {
949                     if (hasCount) {
950                         for (set<string>::iterator it = cnames.begin(); it != cnames.end(); it++) {
951                             string outputString = (*it) + "\t" + fileGroup[thisFastaName] + "\n";
952                             int length = outputString.length();
953                             char* buf2 = new char[length];
954                             memcpy(buf2, outputString.c_str(), length);
955                             MPI_File_write_shared(outMPICount, buf2, length, MPI_CHAR, &status);
956                             delete buf2;
957                         }
958                     }else {
959                         map<string, map<string, string> >::iterator itGroupNameMap = group2NameMap.find(fileGroup[thisFastaName]);
960                         if (itGroupNameMap != group2NameMap.end()) {
961                             map<string, string> thisnamemap = itGroupNameMap->second;
962                             map<string, string>::iterator itN;
963                             for (set<string>::iterator it = cnames.begin(); it != cnames.end(); it++) {
964                                 itN = thisnamemap.find(*it);
965                                 if (itN != thisnamemap.end()) {
966                                     vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
967                                     for (int j = 0; j < tempNames.size(); j++) { //write to accnos file
968                                         string outputString = tempNames[j] + "\n";
969                                         int length = outputString.length();
970                                         char* buf2 = new char[length];
971                                         memcpy(buf2, outputString.c_str(), length);
972                                         
973                                         MPI_File_write_shared(outMPIAccnos, buf2, length, MPI_CHAR, &status);
974                                         delete buf2;
975                                     }
976                                     
977                                 }else { m->mothurOut("[ERROR]: parsing cannot find " + *it + ".\n"); m->control_pressed = true; }
978                             }
979                         }else { m->mothurOut("[ERROR]: parsing cannot find " + fileGroup[thisFastaName] + ".\n"); m->control_pressed = true; }
980                     }
981                     
982                 }
983             }
984                                                 
985                         cout << endl << "It took " << toString(time(NULL) - start) << " secs to check " + toString(num) + " sequences from group " << fileGroup[thisFastaName] << "." << endl;
986                 }
987                 
988                 if (pid == 0) {
989                         for(int i = 1; i < processors; i++) { 
990                                 int temp = 0;
991                                 MPI_Recv(&temp, 1, MPI_INT, i, 2001, MPI_COMM_WORLD, &status);
992                                 numSeqs += temp;
993                         }
994                 }else{ MPI_Send(&numSeqs, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); }
995                 
996                 MPI_File_close(&outMPI);
997                 MPI_File_close(&outMPIAccnos); 
998                 if (trim) { MPI_File_close(&outMPIFasta); }
999         if (hasCount && dups) { MPI_File_close(&outMPICount); }
1000                 
1001                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
1002 #endif
1003                 return 0;
1004                 
1005         }catch(exception& e) {
1006                 m->errorOut(e, "ChimeraSlayerCommand", "MPIExecuteGroups");
1007                 exit(1);
1008         }
1009 }               
1010 //**********************************************************************************************************************
1011 int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, string accnosFileName, string trimFastaFileName, map<string, int>& priority){
1012         try {
1013                 
1014 #ifdef USE_MPI  
1015                 int pid, numSeqsPerProcessor; 
1016                 int tag = 2001;
1017                 vector<unsigned long long> MPIPos;
1018                 
1019                 MPI_Status status; 
1020                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1021                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
1022                 
1023                 MPI_File inMPI;
1024                 MPI_File outMPI;
1025                 MPI_File outMPIAccnos;
1026                 MPI_File outMPIFasta;
1027                 
1028                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
1029                 int inMode=MPI_MODE_RDONLY; 
1030                 
1031                 char outFilename[1024];
1032                 strcpy(outFilename, outputFileName.c_str());
1033                 
1034                 char outAccnosFilename[1024];
1035                 strcpy(outAccnosFilename, accnosFileName.c_str());
1036                 
1037                 char outFastaFilename[1024];
1038                 strcpy(outFastaFilename, trimFastaFileName.c_str());
1039                 
1040                 char inFileName[1024];
1041                 strcpy(inFileName, inputFile.c_str());
1042                 
1043                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
1044                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
1045                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
1046                 if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
1047                 
1048                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos);  return 0;  }
1049                 
1050                 if (pid == 0) { //you are the root process 
1051                         m->mothurOutEndLine();
1052                         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
1053                         m->mothurOutEndLine();
1054                         
1055                         string outTemp = "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
1056                         
1057                         //print header
1058                         int length = outTemp.length();
1059                         char* buf2 = new char[length];
1060                         memcpy(buf2, outTemp.c_str(), length);
1061                         
1062                         MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
1063                         delete buf2;
1064                         
1065                         MPIPos = m->setFilePosFasta(inputFile, numSeqs); //fills MPIPos, returns numSeqs
1066                         
1067                         if (templatefile != "self") { //if template=self we can only use 1 processor
1068                                 //send file positions to all processes
1069                                 for(int i = 1; i < processors; i++) { 
1070                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
1071                                         MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
1072                                 }
1073                         }
1074                         //figure out how many sequences you have to align
1075                         numSeqsPerProcessor = numSeqs / processors;
1076                         int startIndex =  pid * numSeqsPerProcessor;
1077                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
1078                         
1079                         if (templatefile == "self") { //if template=self we can only use 1 processor
1080                                 startIndex = 0;
1081                                 numSeqsPerProcessor = numSeqs;
1082                         }
1083                         
1084                         //do your part
1085             set<string> cnames;
1086                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, cnames, MPIPos, inputFile, priority, false);
1087                                                 
1088                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);   return 0;  }
1089                         
1090                 }else{ //you are a child process
1091                         if (templatefile != "self") { //if template=self we can only use 1 processor
1092                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
1093                                 MPIPos.resize(numSeqs+1);
1094                                 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
1095                                 
1096                                 //figure out how many sequences you have to align
1097                                 numSeqsPerProcessor = numSeqs / processors;
1098                                 int startIndex =  pid * numSeqsPerProcessor;
1099                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
1100                                 
1101                                 //do your part
1102                 set<string> cnames;
1103                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, cnames, MPIPos, inputFile, priority, false);
1104                                 
1105                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  return 0;  }
1106                                 
1107                         }
1108                 }
1109                 
1110                 //close files 
1111                 MPI_File_close(&inMPI);
1112                 MPI_File_close(&outMPI);
1113                 MPI_File_close(&outMPIAccnos); 
1114                 if (trim) { MPI_File_close(&outMPIFasta); }
1115                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
1116                 
1117                 
1118 #endif          
1119                 return numSeqs;
1120         }
1121         catch(exception& e) {
1122                 m->errorOut(e, "ChimeraSlayerCommand", "MPIExecute");
1123                 exit(1);
1124         }
1125 }
1126 //**********************************************************************************************************************
1127 int ChimeraSlayerCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string trimFileName){
1128         try {
1129                 map<string, string>::iterator itUnique;
1130                 int total = 0;
1131         
1132         if (trimera) { //add in more potential uniqueNames
1133             map<string, string> newUniqueNames = uniqueNames;
1134             for (map<string, string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {
1135                 newUniqueNames[(it->first)+"_LEFT"] = (it->first)+"_LEFT";
1136                 newUniqueNames[(it->first)+"_RIGHT"] = (it->first)+"_RIGHT";
1137             }
1138             uniqueNames = newUniqueNames;
1139             newUniqueNames.clear();
1140         }
1141                 
1142                 //edit accnos file
1143                 ifstream in2; 
1144                 m->openInputFile(accnosFileName, in2, "no error");
1145                 
1146                 ofstream out2;
1147                 m->openOutputFile(accnosFileName+".temp", out2);
1148                 
1149                 string name; name = "";
1150                 set<string> chimerasInFile;
1151                 set<string>::iterator itChimeras;
1152                 
1153                 while (!in2.eof()) {
1154                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
1155                         
1156                         in2 >> name; m->gobble(in2);
1157                         
1158                         //find unique name
1159                         itUnique = uniqueNames.find(name);
1160                         
1161                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1162                         else {
1163                                 itChimeras = chimerasInFile.find((itUnique->second));
1164                                 
1165                                 if (itChimeras == chimerasInFile.end()) {
1166                                         out2 << itUnique->second << endl;
1167                                         chimerasInFile.insert((itUnique->second));
1168                                         total++;
1169                                 }
1170                         }
1171                 }
1172                 in2.close();
1173                 out2.close();
1174                 
1175                 m->mothurRemove(accnosFileName);
1176                 rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
1177                 
1178                 
1179                 //edit chimera file
1180                 ifstream in; 
1181                 m->openInputFile(outputFileName, in);
1182                 
1183                 ofstream out;
1184                 m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
1185
1186                 string rest, parent1, parent2, line;
1187                 set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
1188                 set<string>::iterator itNames;
1189                 
1190                 //assumptions - in file each read will always look like...
1191                 /*
1192                  F11Fcsw_92754  no
1193                  F11Fcsw_63104  F11Fcsw_33372   F11Fcsw_37007   0.89441 80.4469 0.2     1.03727 93.2961 52.2    no      0-241   243-369 
1194                  */
1195                 
1196                 //get header line
1197                 if (!in.eof()) {
1198                         line = m->getline(in); m->gobble(in);
1199                         out << line << endl;
1200                 }
1201                 
1202                 //for the chimera file, we want to make sure if any group finds a sequence to be chimeric then all groups do, 
1203                 //so if this is a report that did not find it to be chimeric, but it appears in the accnos file, 
1204                 //then ignore this report and continue until we find the report that found it to be chimeric
1205                 
1206                 while (!in.eof()) {
1207                         
1208                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
1209                         
1210                         in >> name;             m->gobble(in);
1211                         in >> parent1;  m->gobble(in);
1212                         
1213                         if (name == "Name") { //name = "Name" because we append the header line each time we add results from the groups
1214                                 line = m->getline(in); m->gobble(in);
1215                         }else {
1216                                 if (parent1 == "no") {
1217                                         //find unique name
1218                                         itUnique = uniqueNames.find(name);
1219                                         
1220                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1221                                         else {
1222                                                 //is this sequence really not chimeric??
1223                                                 itChimeras = chimerasInFile.find(itUnique->second);
1224                                                 
1225                                                 if (itChimeras == chimerasInFile.end()) {
1226                                                         //is this sequence not already in the file
1227                                                         itNames = namesInFile.find((itUnique->second));
1228                                                         
1229                                                         if (itNames == namesInFile.end()) { out << itUnique->second << '\t' << "no" << endl; namesInFile.insert(itUnique->second); }
1230                                                 }
1231                                         }
1232                                 }else { //read the rest of the line
1233                                         double DivQLAQRB,PerIDQLAQRB,BootStrapA,DivQLBQRA,PerIDQLBQRA,BootStrapB;
1234                                         string flag, range1, range2;
1235                                         bool print = false;
1236                                         in >> parent2 >> DivQLAQRB >> PerIDQLAQRB >> BootStrapA >> DivQLBQRA >> PerIDQLBQRA >> BootStrapB >> flag >> range1 >> range2;  m->gobble(in);
1237                                         
1238                                         //find unique name
1239                                         itUnique = uniqueNames.find(name);
1240                                         
1241                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1242                                         else {
1243                                                 name = itUnique->second;
1244                                                 //is this name already in the file
1245                                                 itNames = namesInFile.find((name));
1246                                                 
1247                                                 if (itNames == namesInFile.end()) { //no not in file
1248                                                         if (flag == "no") { //are you really a no??
1249                                                                 //is this sequence really not chimeric??
1250                                                                 itChimeras = chimerasInFile.find(name);
1251                                                                 
1252                                                                 //then you really are a no so print, otherwise skip
1253                                                                 if (itChimeras == chimerasInFile.end()) { print = true; }
1254                                                                 
1255                                                         }else{ print = true; }
1256                                                 }
1257                                         }
1258                                         
1259                                         if (print) {
1260                                                 out << name << '\t';
1261                                                 
1262                                                 namesInFile.insert(name);
1263
1264                                                 //output parent1's name
1265                                                 itUnique = uniqueNames.find(parent1);
1266                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1267                                                 else { out << itUnique->second << '\t'; }
1268                                                 
1269                                                 //output parent2's name
1270                                                 itUnique = uniqueNames.find(parent2);
1271                                                 if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1272                                                 else { out << itUnique->second << '\t'; }
1273                                                 
1274                                                 out << DivQLAQRB << '\t' << PerIDQLAQRB << '\t' << BootStrapA << '\t' << DivQLBQRA << '\t' << PerIDQLBQRA << '\t' << BootStrapB << '\t' << flag << '\t' << range1 << '\t' << range2 << endl;
1275                                         }
1276                                 }                               
1277                         }
1278                 }
1279                 in.close();
1280                 out.close();
1281                 
1282                 m->mothurRemove(outputFileName);
1283                 rename((outputFileName+".temp").c_str(), outputFileName.c_str());
1284                 
1285                 //edit fasta file
1286                 if (trim) {
1287                         ifstream in3; 
1288                         m->openInputFile(trimFileName, in3);
1289                         
1290                         ofstream out3;
1291                         m->openOutputFile(trimFileName+".temp", out3);
1292                         
1293                         namesInFile.clear();
1294                         
1295                         while (!in3.eof()) {
1296                                 if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); m->mothurRemove((trimFileName+".temp")); return 0; }
1297                                 
1298                                 Sequence seq(in3); m->gobble(in3);
1299                                 
1300                                 if (seq.getName() != "") {
1301                                         //find unique name
1302                                         itUnique = uniqueNames.find(seq.getName());
1303                                         
1304                                         if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ seq.getName() + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1305                                         else {
1306                                                 itNames = namesInFile.find((itUnique->second));
1307                                                 
1308                                                 if (itNames == namesInFile.end()) {
1309                                                         seq.printSequence(out3);
1310                                                 }
1311                                         }
1312                                 }
1313                         }
1314                         in3.close();
1315                         out3.close();
1316                         
1317                         m->mothurRemove(trimFileName);
1318                         rename((trimFileName+".temp").c_str(), trimFileName.c_str());
1319                 }
1320                 
1321                 return total;
1322         }
1323         catch(exception& e) {
1324                 m->errorOut(e, "ChimeraSlayerCommand", "deconvoluteResults");
1325                 exit(1);
1326         }
1327 }
1328 //**********************************************************************************************************************
1329 int ChimeraSlayerCommand::setUpForSelfReference(SequenceParser*& parser, map<string, string>& fileGroup, map<string, map<string, int> >& fileToPriority, int s){
1330         try {
1331                 fileGroup.clear();
1332                 fileToPriority.clear();
1333                 
1334                 string nameFile = "";
1335                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
1336                         nameFile = nameFileNames[s];
1337                 }else {  nameFile = getNamesFile(fastaFileNames[s]); }
1338                 
1339                 //you provided a groupfile
1340                 string groupFile = "";
1341                 if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; }
1342                 
1343                 if (groupFile == "") { 
1344                         if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
1345                                                 
1346                         //sort fastafile by abundance, returns new sorted fastafile name
1347                         m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
1348                         priority = sortFastaFile(fastaFileNames[s], nameFile);
1349                         m->mothurOut("Done."); m->mothurOutEndLine();
1350                         
1351                         fileToPriority[fastaFileNames[s]] = priority;
1352                         fileGroup[fastaFileNames[s]] = "noGroup";
1353                 }else {
1354                         //Parse sequences by group
1355                         parser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
1356                         vector<string> groups = parser->getNamesOfGroups();
1357                         
1358                         for (int i = 0; i < groups.size(); i++) {
1359                                 vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
1360                                 map<string, string> thisGroupsMap = parser->getNameMap(groups[i]);
1361                 group2NameMap[groups[i]] = thisGroupsMap;
1362                                 string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
1363                                 priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); 
1364                                 fileToPriority[newFastaFile] = priority;
1365                                 fileGroup[newFastaFile] = groups[i];
1366                         }
1367                 }
1368                 
1369                 
1370                 return 0;
1371         }
1372         catch(exception& e) {
1373                 m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference");
1374                 exit(1);
1375         }
1376 }
1377 //**********************************************************************************************************************
1378 int ChimeraSlayerCommand::setUpForSelfReference(SequenceCountParser*& parser, map<string, string>& fileGroup, map<string, map<string, int> >& fileToPriority, int s){
1379         try {
1380                 fileGroup.clear();
1381                 fileToPriority.clear();
1382                 
1383                 string nameFile = "";
1384                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
1385                         nameFile = nameFileNames[s];
1386                 }else {  m->control_pressed = true; return 0; }
1387          
1388                 CountTable ct;
1389                 if (!ct.testGroups(nameFile)) {  
1390                         if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
1391             
1392                         //sort fastafile by abundance, returns new sorted fastafile name
1393                         m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
1394                         priority = sortFastaFile(fastaFileNames[s], nameFile);
1395                         m->mothurOut("Done."); m->mothurOutEndLine();
1396                         
1397                         fileToPriority[fastaFileNames[s]] = priority;
1398                         fileGroup[fastaFileNames[s]] = "noGroup";
1399                 }else {
1400                         //Parse sequences by group
1401                         parser = new SequenceCountParser(nameFile, fastaFileNames[s]);
1402                         vector<string> groups = parser->getNamesOfGroups();
1403                         
1404                         for (int i = 0; i < groups.size(); i++) {
1405                                 vector<Sequence> thisGroupsSeqs = parser->getSeqs(groups[i]);
1406                                 map<string, int> thisGroupsMap = parser->getCountTable(groups[i]);
1407                                 string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta";
1408                                 sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); 
1409                                 fileToPriority[newFastaFile] = thisGroupsMap;
1410                                 fileGroup[newFastaFile] = groups[i];
1411                         }
1412                 }
1413                 
1414                 
1415                 return 0;
1416         }
1417         catch(exception& e) {
1418                 m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference");
1419                 exit(1);
1420         }
1421 }
1422 //**********************************************************************************************************************
1423 string ChimeraSlayerCommand::getNamesFile(string& inputFile){
1424         try {
1425                 string nameFile = "";
1426                 
1427                 m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
1428                 
1429                 //use unique.seqs to create new name and fastafile
1430                 string inputString = "fasta=" + inputFile;
1431                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1432                 m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
1433                 m->mothurCalling = true;
1434         
1435                 Command* uniqueCommand = new DeconvoluteCommand(inputString);
1436                 uniqueCommand->execute();
1437                 
1438                 map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
1439                 
1440                 delete uniqueCommand;
1441                 m->mothurCalling = false;
1442                 m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
1443                 
1444                 nameFile = filenames["name"][0];
1445                 inputFile = filenames["fasta"][0];
1446                 
1447                 return nameFile;
1448         }
1449         catch(exception& e) {
1450                 m->errorOut(e, "ChimeraSlayerCommand", "getNamesFile");
1451                 exit(1);
1452         }
1453 }
1454 //**********************************************************************************************************************
1455
1456 int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup, string countlist){
1457         try {
1458                 int totalSeqs = 0;
1459         ofstream outCountList;
1460
1461         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
1462                 
1463                 for (map<string, map<string, int> >::iterator itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) {
1464                         
1465                         if (m->control_pressed) {  return 0;  }
1466                         
1467                         int start = time(NULL);
1468                         string thisFastaName = itFile->first;
1469                         map<string, int> thisPriority = itFile->second;
1470                         string thisoutputFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera";
1471                         string thisaccnosFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos";
1472                         string thistrimFastaFileName = outputDir + m->getRootName(m->getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta";
1473                         
1474                         m->mothurOutEndLine(); m->mothurOut("Checking sequences from group: " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); 
1475                         
1476                         lines.clear();
1477 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1478                         int proc = 1;
1479                         vector<unsigned long long> positions = m->divideFile(thisFastaName, proc);
1480                         lines.push_back(linePair(positions[0], positions[1]));  
1481 #else
1482                         lines.push_back(linePair(0, 1000)); 
1483 #endif                  
1484                         int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority);
1485                         
1486             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
1487             //This table will zero out group counts for seqs determined to be chimeric by that group.
1488             if (dups) {
1489                 if (!m->isBlank(thisaccnosFileName)) {
1490                     ifstream in;
1491                     m->openInputFile(thisaccnosFileName, in);
1492                     string name;
1493                     if (hasCount) {
1494                         while (!in.eof()) {
1495                             in >> name; m->gobble(in);
1496                             outCountList << name << '\t' << fileGroup[thisFastaName] << endl;
1497                         }
1498                         in.close();
1499                     }else {
1500                         map<string, map<string, string> >::iterator itGroupNameMap = group2NameMap.find(fileGroup[thisFastaName]);
1501                         if (itGroupNameMap != group2NameMap.end()) {
1502                             map<string, string> thisnamemap = itGroupNameMap->second;
1503                             map<string, string>::iterator itN;
1504                             ofstream out;
1505                             m->openOutputFile(thisaccnosFileName+".temp", out);
1506                             while (!in.eof()) {
1507                                 in >> name; m->gobble(in);
1508                                 itN = thisnamemap.find(name);
1509                                 if (itN != thisnamemap.end()) {
1510                                     vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
1511                                     for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
1512                                 
1513                                 }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
1514                             }
1515                             out.close();
1516                             in.close();
1517                             m->renameFile(thisaccnosFileName+".temp", thisaccnosFileName);
1518                         }else { m->mothurOut("[ERROR]: parsing cannot find " + fileGroup[thisFastaName] + ".\n"); m->control_pressed = true; }
1519                     }
1520                     
1521                 }
1522             }
1523
1524                         //append files
1525                         m->appendFiles(thisoutputFileName, outputFName); m->mothurRemove(thisoutputFileName); 
1526                         m->appendFiles(thisaccnosFileName, accnos); m->mothurRemove(thisaccnosFileName);
1527                         if (trim) { m->appendFiles(thistrimFastaFileName, fasta); m->mothurRemove(thistrimFastaFileName); }
1528                         m->mothurRemove(thisFastaName);
1529                         
1530                         totalSeqs += numSeqs;
1531                         
1532                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + fileGroup[thisFastaName] + ".");     m->mothurOutEndLine();
1533                 }
1534                 
1535         if (hasCount && dups) { outCountList.close(); }
1536         
1537                 return totalSeqs;
1538         }
1539         catch(exception& e) {
1540                 m->errorOut(e, "ChimeraSlayerCommand", "driverGroups");
1541                 exit(1);
1542         }
1543 }
1544 /**************************************************************************************************/
1545 int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accnos, string fasta, map<string, map<string, int> >& fileToPriority, map<string, string>& fileGroup, string countlist, string countFile) {
1546         try {
1547                 int process = 1;
1548                 int num = 0;
1549                 processIDS.clear();
1550                 
1551                 if (fileToPriority.size() < processors) { processors = fileToPriority.size(); }
1552         
1553         CountTable newCount;
1554         if (hasCount && dups) { newCount.readTable(countFile, true); }
1555                 
1556                 int groupsPerProcessor = fileToPriority.size() / processors;
1557                 int remainder = fileToPriority.size() % processors;
1558                 
1559                 vector< map<string, map<string, int> > > breakUp;
1560                 
1561                 for (int i = 0; i < processors; i++) {
1562                         map<string, map<string, int> > thisFileToPriority;
1563                         map<string, map<string, int> >::iterator itFile;
1564                         int count = 0;
1565                         int enough = groupsPerProcessor;
1566                         if (i == 0) { enough = groupsPerProcessor + remainder; }
1567                         
1568                         for (itFile = fileToPriority.begin(); itFile != fileToPriority.end();) {
1569                                 thisFileToPriority[itFile->first] = itFile->second;
1570                                 fileToPriority.erase(itFile++);
1571                                 count++;
1572                                 if (count == enough) { break; }
1573                         }       
1574                         breakUp.push_back(thisFileToPriority);
1575                 }
1576                                 
1577 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1578                 //loop through and create all the processes you want
1579                 while (process != processors) {
1580                         int pid = fork();
1581                         
1582                         if (pid > 0) {
1583                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1584                                 process++;
1585                         }else if (pid == 0){
1586                                 num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup, accnos + toString(getpid()) + ".byCount");
1587                                 
1588                                 //pass numSeqs to parent
1589                                 ofstream out;
1590                                 string tempFile = outputFName + toString(getpid()) + ".num.temp";
1591                                 m->openOutputFile(tempFile, out);
1592                                 out << num << endl;
1593                                 out.close();
1594                                 exit(0);
1595                         }else { 
1596                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1597                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1598                                 exit(0);
1599                         }
1600                 }
1601                 
1602                 num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup, accnos + ".byCount");
1603
1604                 //force parent to wait until all the processes are done
1605                 for (int i=0;i<processors;i++) { 
1606                         int temp = processIDS[i];
1607                         wait(&temp);
1608                 }
1609                 
1610                 for (int i = 0; i < processIDS.size(); i++) {
1611                         ifstream in;
1612                         string tempFile =  outputFName + toString(processIDS[i]) + ".num.temp";
1613                         m->openInputFile(tempFile, in);
1614                         if (!in.eof()) { int tempNum = 0;  in >> tempNum; num += tempNum; }
1615                         in.close(); m->mothurRemove(tempFile);
1616                 }
1617 #else
1618                 
1619                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1620                 //Windows version shared memory, so be careful when passing variables through the slayerData struct. 
1621                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1622                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1623                 
1624                 vector<slayerData*> pDataArray; 
1625                 DWORD   dwThreadIdArray[processors-1];
1626                 HANDLE  hThreadArray[processors-1]; 
1627                 
1628                 //Create processor worker threads.
1629                 for(int i=1; i<processors; i++ ){
1630                         string extension = toString(i) + ".temp";
1631                         slayerData* tempslayer = new slayerData(group2NameMap, hasCount, dups, (accnos + toString(i) +".byCount"), (outputFName + extension), (fasta + extension), (accnos + extension), templatefile, search, blastlocation, trimera, trim, realign, m, breakUp[i], fileGroup, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
1632                         pDataArray.push_back(tempslayer);
1633                         processIDS.push_back(i);
1634                         
1635                         //MySlayerThreadFunction is in header. It must be global or static to work with the threads.
1636                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1637                         hThreadArray[i-1] = CreateThread(NULL, 0, MySlayerGroupThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
1638                 }
1639                 
1640                 num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup, accnos + ".byCount");
1641                 
1642                 //Wait until all threads have terminated.
1643                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1644                 
1645                 //Close all thread handles and free memory allocations.
1646                 for(int i=0; i < pDataArray.size(); i++){
1647             if (pDataArray[i]->fileToPriority.size() != pDataArray[i]->end) {
1648                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->end) + " of " + toString(pDataArray[i]->fileToPriority.size()) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
1649             }
1650                         num += pDataArray[i]->count;
1651                         CloseHandle(hThreadArray[i]);
1652                         delete pDataArray[i];
1653                 }
1654 #endif  
1655         //read my own
1656         if (hasCount && dups) {
1657             if (!m->isBlank(accnos + ".byCount")) {
1658                 ifstream in2;
1659                 m->openInputFile(accnos + ".byCount", in2);
1660                 
1661                 string name, group;
1662                 while (!in2.eof()) {
1663                     in2 >> name >> group; m->gobble(in2);
1664                     newCount.setAbund(name, group, 0);
1665                 }
1666                 in2.close();
1667             }
1668             m->mothurRemove(accnos + ".byCount");
1669         }
1670
1671                 
1672                 //append output files
1673                 for(int i=0;i<processIDS.size();i++){
1674                         m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
1675                         m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
1676                         
1677                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
1678                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
1679                         
1680                         if (trim) {
1681                                 m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta);
1682                                 m->mothurRemove((fasta + toString(processIDS[i]) + ".temp"));
1683                         }
1684             
1685             if (hasCount && dups) {
1686                 if (!m->isBlank(accnos + toString(processIDS[i]) + ".byCount")) {
1687                     ifstream in2;
1688                     m->openInputFile(accnos  + toString(processIDS[i]) + ".byCount", in2);
1689                     
1690                     string name, group;
1691                     while (!in2.eof()) {
1692                         in2 >> name >> group; m->gobble(in2);
1693                         newCount.setAbund(name, group, 0);
1694                     }
1695                     in2.close();
1696                 }
1697                 m->mothurRemove(accnos + toString(processIDS[i]) + ".byCount");
1698             }
1699
1700                 }
1701                 
1702         //print new *.pick.count_table
1703         if (hasCount && dups) {  newCount.printTable(countlist);   }
1704                 
1705                 return num;
1706         }
1707         catch(exception& e) {
1708                 m->errorOut(e, "ChimeraSlayerCommand", "createProcessesGroups");
1709                 exit(1);
1710         }
1711 }
1712 //**********************************************************************************************************************
1713
1714 int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string filename, string accnos, string fasta, map<string, int>& priority){
1715         try {
1716                 
1717                 Chimera* chimera;
1718                 if (templatefile != "self") { //you want to run slayer with a reference template
1719                         chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());       
1720                 }else {
1721                         chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());     
1722                 }
1723                 
1724                 if (m->control_pressed) { delete chimera; return 0; }
1725                 
1726                 if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
1727                 templateSeqsLength = chimera->getLength();
1728                 
1729                 ofstream out;
1730                 m->openOutputFile(outputFName, out);
1731                 
1732                 ofstream out2;
1733                 m->openOutputFile(accnos, out2);
1734                 
1735                 ofstream out3;
1736                 if (trim) {  m->openOutputFile(fasta, out3); }
1737                 
1738                 ifstream inFASTA;
1739                 m->openInputFile(filename, inFASTA);
1740
1741                 inFASTA.seekg(filePos.start);
1742                 
1743                 if (filePos.start == 0) { chimera->printHeader(out); }
1744
1745                 bool done = false;
1746                 int count = 0;
1747         
1748                 while (!done) {
1749                 
1750                         if (m->control_pressed) {       delete chimera; out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1;       }
1751                 
1752                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
1753                         string candidateAligned = candidateSeq->getAligned();
1754                         
1755                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
1756                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
1757                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
1758                                 }else{
1759                                         //find chimeras
1760                                         chimera->getChimeras(candidateSeq);
1761                                         
1762                                         if (m->control_pressed) {       delete chimera; delete candidateSeq; return 1;  }
1763                                                 
1764                                         //if you are not chimeric, then check each half
1765                                         data_results wholeResults = chimera->getResults();
1766                                         
1767                                         //determine if we need to split
1768                                         bool isChimeric = false;
1769                                         
1770                                         if (wholeResults.flag == "yes") {
1771                                                 string chimeraFlag = "no";
1772                                                 if(  (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR)
1773                                                    ||
1774                                                    (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
1775                                                 
1776                                                 
1777                                                 if (chimeraFlag == "yes") {     
1778                                                         if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; }
1779                                                 }
1780                                         }
1781                                         
1782                                         if ((!isChimeric) && trimera) {
1783                                                 
1784                                                 //split sequence in half by bases
1785                                                 string leftQuery, rightQuery;
1786                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
1787                                                 divideInHalf(tempSeq, leftQuery, rightQuery);
1788                                                 
1789                                                 //run chimeraSlayer on each piece
1790                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
1791                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
1792                                                 
1793                                                 //find chimeras
1794                                                 chimera->getChimeras(left);
1795                                                 data_results leftResults = chimera->getResults();
1796                                                 
1797                                                 chimera->getChimeras(right);
1798                                                 data_results rightResults = chimera->getResults();
1799                                                 
1800                                                 //if either piece is chimeric then report
1801                                                 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
1802                                                 if (trim) { trimmed.printSequence(out3);  }
1803                                                 
1804                                                 delete left; delete right;
1805                                                 
1806                                         }else { //already chimeric
1807                                                 //print results
1808                                                 Sequence trimmed = chimera->print(out, out2);
1809                                                 if (trim) { trimmed.printSequence(out3);  }
1810                                         }
1811                                         
1812                                         
1813                                 }
1814                                 count++;
1815                         }
1816                         
1817                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1818                                 unsigned long long pos = inFASTA.tellg();
1819                                 if ((pos == -1) || (pos >= filePos.end)) { break; }
1820                         #else
1821                                 if (inFASTA.eof()) { break; }
1822                         #endif
1823                         
1824                         delete candidateSeq;
1825                         //report progress
1826                         if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");             }
1827                 }
1828                 //report progress
1829                 if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+ "\n");              }
1830                 
1831                 int numNoParents = chimera->getNumNoParents();
1832                 if (numNoParents == count) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } 
1833                 
1834                 out.close();
1835                 out2.close();
1836                 if (trim) { out3.close(); }
1837                 inFASTA.close();
1838                 delete chimera;
1839                                 
1840                 return count;
1841                 
1842                 
1843         }
1844         catch(exception& e) {
1845                 m->errorOut(e, "ChimeraSlayerCommand", "driver");
1846                 exit(1);
1847         }
1848 }
1849 //**********************************************************************************************************************
1850 #ifdef USE_MPI
1851 int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, set<string>& cnames, vector<unsigned long long>& MPIPos, string filename, map<string, int>& priority, bool byGroup){
1852         try {
1853                 MPI_Status status; 
1854                 int pid;
1855                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1856                 
1857                 Chimera* chimera;
1858                 if (templatefile != "self") { //you want to run slayer with a reference template
1859                         chimera = new ChimeraSlayer(filename, templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand());       
1860                 }else {
1861                         chimera = new ChimeraSlayer(filename, templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand(), byGroup);    
1862                 }
1863                 
1864                 if (m->control_pressed) { delete chimera; return 0; }
1865                 
1866                 if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
1867                 templateSeqsLength = chimera->getLength();
1868                 
1869                 for(int i=0;i<num;i++){
1870                         
1871                         if (m->control_pressed) {       delete chimera; return 1;       }
1872                         
1873                         //read next sequence
1874                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1875
1876                         char* buf4 = new char[length];
1877                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1878         
1879                         string tempBuf = buf4;
1880                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1881                         istringstream iss (tempBuf,istringstream::in);
1882
1883                         delete buf4;
1884
1885                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
1886                         string candidateAligned = candidateSeq->getAligned();
1887                 
1888                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
1889                                 
1890                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
1891                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
1892                                 }else{
1893                 
1894                                         //find chimeras
1895                                         chimera->getChimeras(candidateSeq);
1896                         
1897                                         if (m->control_pressed) {       delete chimera; delete candidateSeq; return 1;  }
1898                                         
1899                                         //if you are not chimeric, then check each half
1900                                         data_results wholeResults = chimera->getResults();
1901                                         
1902                                         //determine if we need to split
1903                                         bool isChimeric = false;
1904                                         
1905                                         if (wholeResults.flag == "yes") {
1906                                                 string chimeraFlag = "no";
1907                                                 if(  (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR)
1908                                                    ||
1909                                                    (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
1910                                                 
1911                                                 
1912                                                 if (chimeraFlag == "yes") {     
1913                                                         if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; }
1914                                                 }
1915                                         }
1916                                         
1917                                         if ((!isChimeric) && trimera) {                                                 
1918                                                 //split sequence in half by bases
1919                                                 string leftQuery, rightQuery;
1920                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
1921                                                 divideInHalf(tempSeq, leftQuery, rightQuery);
1922                                                 
1923                                                 //run chimeraSlayer on each piece
1924                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
1925                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
1926                                                 
1927                                                 //find chimeras
1928                                                 chimera->getChimeras(left);
1929                                                 data_results leftResults = chimera->getResults();
1930                                                 
1931                                                 chimera->getChimeras(right);
1932                                                 data_results rightResults = chimera->getResults();
1933                                                 
1934                                                 //if either piece is chimeric then report
1935                         bool flag = false;
1936                                                 Sequence trimmed = chimera->print(outMPI, outAccMPI, leftResults, rightResults, flag);
1937                         if (flag) { cnames.insert(candidateSeq->getName()); }
1938                             
1939                                                 if (trim) {  
1940                                                         string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";
1941                                                         
1942                                                         //write to accnos file
1943                                                         int length = outputString.length();
1944                                                         char* buf2 = new char[length];
1945                                                         memcpy(buf2, outputString.c_str(), length);
1946                                                         
1947                                                         MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
1948                                                         delete buf2;
1949                                                 }
1950                                                 
1951                                                 delete left; delete right;
1952                                                 
1953                                         }else { 
1954                                                 //print results
1955                                                 Sequence trimmed = chimera->print(outMPI, outAccMPI);
1956                         cnames.insert(candidateSeq->getName());
1957                                                 
1958                                                 if (trim) {  
1959                                                         string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";
1960                                                         
1961                                                         //write to accnos file
1962                                                         int length = outputString.length();
1963                                                         char* buf2 = new char[length];
1964                                                         memcpy(buf2, outputString.c_str(), length);
1965                                                         
1966                                                         MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status);
1967                                                         delete buf2;
1968                                                 }
1969                                         }
1970                                         
1971                                 }
1972                         }
1973                         delete candidateSeq;
1974                         
1975                         //report progress
1976                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;                }
1977                 }
1978                 //report progress
1979                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl;         }
1980                 
1981                 int numNoParents = chimera->getNumNoParents();
1982                 if (numNoParents == num) { cout << "[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors." << endl; }
1983                 
1984                 delete chimera;         
1985                 return 0;
1986         }
1987         catch(exception& e) {
1988                 m->errorOut(e, "ChimeraSlayerCommand", "driverMPI");
1989                 exit(1);
1990         }
1991 }
1992 #endif
1993
1994 /**************************************************************************************************/
1995
1996 int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta, map<string, int>& thisPriority) {
1997         try {
1998                 int process = 0;
1999                 int num = 0;
2000                 processIDS.clear();
2001                 
2002 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
2003                 //loop through and create all the processes you want
2004                 while (process != processors) {
2005                         int pid = fork();
2006                         
2007                         if (pid > 0) {
2008                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
2009                                 process++;
2010                         }else if (pid == 0){
2011                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", thisPriority);
2012                                 
2013                                 //pass numSeqs to parent
2014                                 ofstream out;
2015                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
2016                                 m->openOutputFile(tempFile, out);
2017                                 out << num << endl;
2018                                 out.close();
2019                                 exit(0);
2020                         }else { 
2021                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
2022                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
2023                                 exit(0);
2024                         }
2025                 }
2026                 
2027                 //force parent to wait until all the processes are done
2028                 for (int i=0;i<processors;i++) { 
2029                         int temp = processIDS[i];
2030                         wait(&temp);
2031                 }
2032                 
2033                 for (int i = 0; i < processIDS.size(); i++) {
2034                         ifstream in;
2035                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
2036                         m->openInputFile(tempFile, in);
2037                         if (!in.eof()) { int tempNum = 0;  in >> tempNum; num += tempNum; }
2038                         in.close(); m->mothurRemove(tempFile);
2039                 }
2040 #else
2041                 
2042                 //////////////////////////////////////////////////////////////////////////////////////////////////////
2043                 //Windows version shared memory, so be careful when passing variables through the slayerData struct. 
2044                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
2045                 //////////////////////////////////////////////////////////////////////////////////////////////////////
2046                 
2047                 vector<slayerData*> pDataArray; 
2048                 DWORD   dwThreadIdArray[processors];
2049                 HANDLE  hThreadArray[processors]; 
2050                 
2051                 //Create processor worker threads.
2052                 for( int i=0; i<processors; i++ ){
2053                         string extension = toString(i) + ".temp";
2054                         slayerData* tempslayer = new slayerData((outputFileName + extension), (fasta + extension), (accnos + extension), filename, templatefile, search, blastlocation, trimera, trim, realign, m, lines[i].start, lines[i].end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i);
2055                         pDataArray.push_back(tempslayer);
2056                         processIDS.push_back(i);
2057                         
2058                         //MySlayerThreadFunction is in header. It must be global or static to work with the threads.
2059                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
2060                         hThreadArray[i] = CreateThread(NULL, 0, MySlayerThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
2061                 }
2062                                 
2063                 //Wait until all threads have terminated.
2064                 WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
2065                 
2066                 //Close all thread handles and free memory allocations.
2067                 for(int i=0; i < pDataArray.size(); i++){
2068                         num += pDataArray[i]->count;
2069                         CloseHandle(hThreadArray[i]);
2070                         delete pDataArray[i];
2071                 }
2072 #endif  
2073                 
2074                 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
2075                 rename((accnos + toString(processIDS[0]) + ".temp").c_str(), accnos.c_str());
2076                 if (trim) {  rename((fasta + toString(processIDS[0]) + ".temp").c_str(), fasta.c_str()); }
2077                 
2078                 //append output files
2079                 for(int i=1;i<processIDS.size();i++){
2080                         m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
2081                         m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
2082                         
2083                         m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
2084                         m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
2085                         
2086                         if (trim) {
2087                                 m->appendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta);
2088                                 m->mothurRemove((fasta + toString(processIDS[i]) + ".temp"));
2089                         }
2090                 }
2091                 
2092                 
2093                 return num;
2094         }
2095         catch(exception& e) {
2096                 m->errorOut(e, "ChimeraSlayerCommand", "createProcesses");
2097                 exit(1);
2098         }
2099 }
2100
2101 /**************************************************************************************************/
2102
2103 int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, string& rightQuery) {
2104         try {
2105                 
2106                 string queryUnAligned = querySeq.getUnaligned();
2107                 int numBases = int(queryUnAligned.length() * 0.5);
2108                 
2109                 string queryAligned = querySeq.getAligned();
2110                 leftQuery = querySeq.getAligned();
2111                 rightQuery = querySeq.getAligned();
2112                 
2113                 int baseCount = 0;
2114                 int leftSpot = 0;
2115                 for (int i = 0; i < queryAligned.length(); i++) {
2116                         //if you are a base
2117                         if (isalpha(queryAligned[i])) {         
2118                                 baseCount++; 
2119                         }
2120                         
2121                         //if you have half
2122                         if (baseCount >= numBases) {  leftSpot = i; break; } //first half
2123                 }
2124                 
2125                 //blank out right side
2126                 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
2127                 
2128                 //blank out left side
2129                 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
2130                 
2131                 return 0;
2132                 
2133         }
2134         catch(exception& e) {
2135                 m->errorOut(e, "ChimeraSlayerCommand", "divideInHalf");
2136                 exit(1);
2137         }
2138 }
2139 /**************************************************************************************************/
2140 map<string, int> ChimeraSlayerCommand::sortFastaFile(string fastaFile, string nameFile) {
2141         try {
2142                 map<string, int> nameAbund;
2143                 
2144                 //read through fastafile and store info
2145                 map<string, string> seqs;
2146                 ifstream in;
2147                 m->openInputFile(fastaFile, in);
2148                 
2149                 while (!in.eof()) {
2150                         
2151                         if (m->control_pressed) { in.close(); return nameAbund; }
2152                         
2153                         Sequence seq(in); m->gobble(in);
2154                         seqs[seq.getName()] = seq.getAligned();
2155                 }
2156                 
2157                 in.close();
2158                 
2159                 //read namefile or countfile
2160                 vector<seqPriorityNode> nameMapCount;
2161         int error;
2162         if (hasCount) { 
2163             CountTable ct;
2164             ct.readTable(nameFile, true);
2165             
2166             for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
2167                 int num = ct.getNumSeqs(it->first);
2168                 if (num == 0) { error = 1; }
2169                 else {
2170                     seqPriorityNode temp(num, it->second, it->first);
2171                     nameMapCount.push_back(temp);
2172                 }
2173             }
2174         }else { error = m->readNames(nameFile, nameMapCount, seqs); }
2175                 
2176                 if (m->control_pressed) { return nameAbund; }
2177                 
2178                 if (error == 1) { m->control_pressed = true; return nameAbund; }
2179                 if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return nameAbund; }
2180
2181                 sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
2182                 
2183                 string newFasta = fastaFile + ".temp";
2184                 ofstream out;
2185                 m->openOutputFile(newFasta, out);
2186                 
2187                 //print new file in order of
2188                 for (int i = 0; i < nameMapCount.size(); i++) {
2189                         out << ">" << nameMapCount[i].name << endl << nameMapCount[i].seq << endl;
2190                         nameAbund[nameMapCount[i].name] = nameMapCount[i].numIdentical;
2191                 }
2192                 out.close();
2193                 
2194                 rename(newFasta.c_str(), fastaFile.c_str());
2195                                 
2196                 return nameAbund;
2197                 
2198         }
2199         catch(exception& e) {
2200                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
2201                 exit(1);
2202         }
2203 }
2204 /**************************************************************************************************/
2205 map<string, int> ChimeraSlayerCommand::sortFastaFile(vector<Sequence>& thisseqs, map<string, string>& nameMap, string newFile) {
2206         try {
2207                 map<string, int> nameAbund;
2208                 vector<seqPriorityNode> nameVector;
2209                 
2210                 //read through fastafile and store info
2211                 map<string, string> seqs;
2212                                 
2213                 for (int i = 0; i < thisseqs.size(); i++) {
2214                         
2215                         if (m->control_pressed) { return nameAbund; }
2216                         
2217                         map<string, string>::iterator itNameMap = nameMap.find(thisseqs[i].getName());
2218                         
2219                         if (itNameMap == nameMap.end()){
2220                                 m->control_pressed = true;
2221                                 m->mothurOut("[ERROR]: " + thisseqs[i].getName() + " is in your fastafile, but is not in your namesfile, please correct."); m->mothurOutEndLine();
2222                         }else {
2223                                 int num = m->getNumNames(itNameMap->second);
2224                                 
2225                                 seqPriorityNode temp(num, thisseqs[i].getAligned(), thisseqs[i].getName());
2226                                 nameVector.push_back(temp);
2227                         }
2228                 }
2229         
2230                 //sort by num represented
2231                 sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
2232         
2233                 if (m->control_pressed) { return nameAbund; }
2234                 
2235                 if (thisseqs.size() != nameVector.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return nameAbund; }
2236                                 
2237                 ofstream out;
2238                 m->openOutputFile(newFile, out);
2239                 
2240                 //print new file in order of
2241                 for (int i = 0; i < nameVector.size(); i++) {
2242                         out << ">" << nameVector[i].name << endl << nameVector[i].seq << endl;
2243                         nameAbund[nameVector[i].name] = nameVector[i].numIdentical;
2244                 }
2245                 out.close();
2246                 
2247                 return nameAbund;
2248                 
2249         }
2250         catch(exception& e) {
2251                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
2252                 exit(1);
2253         }
2254 }
2255 /**************************************************************************************************/
2256 int ChimeraSlayerCommand::sortFastaFile(vector<Sequence>& thisseqs, map<string, int>& countMap, string newFile) {
2257         try {
2258                 vector<seqPriorityNode> nameVector;
2259                 
2260                 //read through fastafile and store info
2261                 map<string, string> seqs;
2262         
2263                 for (int i = 0; i < thisseqs.size(); i++) {
2264                         
2265                         if (m->control_pressed) { return 0; }
2266                         
2267                         map<string, int>::iterator itCountMap = countMap.find(thisseqs[i].getName());
2268                         
2269                         if (itCountMap == countMap.end()){
2270                                 m->control_pressed = true;
2271                                 m->mothurOut("[ERROR]: " + thisseqs[i].getName() + " is in your fastafile, but is not in your count file, please correct."); m->mothurOutEndLine();
2272                         }else {
2273                 seqPriorityNode temp(itCountMap->second, thisseqs[i].getAligned(), thisseqs[i].getName());
2274                                 nameVector.push_back(temp);
2275                         }
2276                 }
2277         
2278                 //sort by num represented
2279                 sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes);
2280         
2281                 if (m->control_pressed) { return 0; }
2282                 
2283                 if (thisseqs.size() != nameVector.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your count file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
2284         
2285                 ofstream out;
2286                 m->openOutputFile(newFile, out);
2287                 
2288                 //print new file in order of
2289                 for (int i = 0; i < nameVector.size(); i++) {
2290                         out << ">" << nameVector[i].name << endl << nameVector[i].seq << endl;
2291                 }
2292                 out.close();
2293                 
2294                 return 0;
2295                 
2296         }
2297         catch(exception& e) {
2298                 m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
2299                 exit(1);
2300         }
2301 }
2302 /**************************************************************************************************/
2303