]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){  
16         try {
17                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pconservation);
20                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pquantile);
21                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
22                 CommandParameter pwindow("window", "Number", "", "0", "", "", "","","",false,false); parameters.push_back(pwindow);
23                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pincrement);
24                 CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44                 helpString += "This command was created using the algorithms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52 #ifdef USE_MPI
53                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
54 #endif
55                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60                 helpString += "The chimera.pintail command should be in the following format: \n";
61                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
64                 return helpString;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 string ChimeraPintailCommand::getOutputPattern(string type) {
73     try {
74         string pattern = "";
75         
76         if (type == "chimera") {  pattern = "[filename],[tag],pintail.chimeras-[filename],pintail.chimeras"; } 
77         else if (type == "accnos") {  pattern = "[filename],[tag],pintail.accnos-[filename],pintail.accnos"; } 
78         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
79         
80         return pattern;
81     }
82     catch(exception& e) {
83         m->errorOut(e, "ChimeraPintailCommand", "getOutputPattern");
84         exit(1);
85     }
86 }
87 //**********************************************************************************************************************
88 ChimeraPintailCommand::ChimeraPintailCommand(){ 
89         try {
90                 abort = true; calledHelp = true;
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["chimera"] = tempOutNames;
94                 outputTypes["accnos"] = tempOutNames;
95         
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
99                 exit(1);
100         }
101 }
102 //***************************************************************************************************************
103 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 rdb = ReferenceDB::getInstance();
107                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter("chimera.pintail");
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         vector<string> tempOutNames;
127                         outputTypes["chimera"] = tempOutNames;
128                         outputTypes["accnos"] = tempOutNames;
129         
130                 
131                         //if the user changes the input directory command factory will send this info to us in the output parameter 
132                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
133                         if (inputDir == "not found"){   inputDir = "";          }
134                         else {
135                                 string path;
136                                 it = parameters.find("reference");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
142                                 }
143                                 
144                                 it = parameters.find("conservation");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
150                                 }
151                                 
152                                 it = parameters.find("quantile");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
158                                 }
159                         }
160
161                         
162                         //check for required parameters
163                         fastafile = validParameter.validFile(parameters, "fasta", false);
164                         if (fastafile == "not found") {                                 
165                                 //if there is a current fasta file, use it
166                                 string filename = m->getFastaFile(); 
167                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
168                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
169                         }else { 
170                                 m->splitAtDash(fastafile, fastaFileNames);
171                                 
172                                 //go through files and make sure they are good, if not, then disregard them
173                                 for (int i = 0; i < fastaFileNames.size(); i++) {
174                                         
175                                         bool ignore = false;
176                                         if (fastaFileNames[i] == "current") { 
177                                                 fastaFileNames[i] = m->getFastaFile(); 
178                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
179                                                 else {  
180                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
181                                                         //erase from file list
182                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
183                                                         i--;
184                                                 }
185                                         }
186                                         
187                                         if (!ignore) {
188                                         
189                                                 if (inputDir != "") {
190                                                         string path = m->hasPath(fastaFileNames[i]);
191                                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
193                                                 }
194                 
195                                                 int ableToOpen;
196                                                 ifstream in;
197                                                 
198                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
199                                         
200                                                 //if you can't open it, try default location
201                                                 if (ableToOpen == 1) {
202                                                         if (m->getDefaultPath() != "") { //default path is set
203                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
204                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
205                                                                 ifstream in2;
206                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
207                                                                 in2.close();
208                                                                 fastaFileNames[i] = tryPath;
209                                                         }
210                                                 }
211                                                 
212                                                 if (ableToOpen == 1) {
213                                                         if (m->getOutputDir() != "") { //default path is set
214                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
215                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
216                                                                 ifstream in2;
217                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
218                                                                 in2.close();
219                                                                 fastaFileNames[i] = tryPath;
220                                                         }
221                                                 }
222
223                                                 in.close();
224                                                 
225                                                 if (ableToOpen == 1) { 
226                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
227                                                         //erase from file list
228                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
229                                                         i--;
230                                                 }else {
231                                                         m->setFastaFile(fastaFileNames[i]);
232                                                 }
233                                         }
234                                 }
235                                 
236                                 //make sure there is at least one valid file left
237                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
238                         }
239                         
240                         string temp;
241                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
242                         filter = m->isTrue(temp);
243                         
244                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
245                         m->setProcessors(temp);
246                         m->mothurConvert(temp, processors);
247                         
248                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
249                         m->mothurConvert(temp, window);
250                         
251                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
252                         m->mothurConvert(temp, increment);
253                         
254                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
255                         save = m->isTrue(temp); 
256                         rdb->save = save; 
257                         if (save) { //clear out old references
258                                 rdb->clearMemory();     
259                         }
260                         
261                         //this has to go after save so that if the user sets save=t and provides no reference we abort
262                         templatefile = validParameter.validFile(parameters, "reference", true);
263                         if (templatefile == "not found") { 
264                                 //check for saved reference sequences
265                                 if (rdb->referenceSeqs.size() != 0) {
266                                         templatefile = "saved";
267                                 }else {
268                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
269                                         m->mothurOutEndLine();
270                                         abort = true; 
271                                 }
272                         }else if (templatefile == "not open") { abort = true; } 
273                         else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
274                         
275                         
276                         maskfile = validParameter.validFile(parameters, "mask", false);
277                         if (maskfile == "not found") { maskfile = "";  }        
278                         else if (maskfile != "default")  { 
279                                 if (inputDir != "") {
280                                         string path = m->hasPath(maskfile);
281                                         //if the user has not given a path then, add inputdir. else leave path alone.
282                                         if (path == "") {       maskfile = inputDir + maskfile;         }
283                                 }
284
285                                 ifstream in;
286                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
287                                 if (ableToOpen == 1) { 
288                                         if (m->getDefaultPath() != "") { //default path is set
289                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
290                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
291                                                         ifstream in2;
292                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
293                                                         in2.close();
294                                                         maskfile = tryPath;
295                                         }
296                                 }
297                                 
298                                 if (ableToOpen == 1) {
299                                                 if (m->getOutputDir() != "") { //default path is set
300                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
301                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
302                                                         ifstream in2;
303                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
304                                                         in2.close();
305                                                         maskfile = tryPath;
306                                                 }
307                                 }
308                                 
309                                 in.close();
310                                         
311                                 if (ableToOpen == 1) { 
312                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
313                                                 abort = true;
314                                 }
315                         }
316
317                         
318                         //if the user changes the output directory command factory will send this info to us in the output parameter 
319                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
320                 
321                         consfile = validParameter.validFile(parameters, "conservation", true);
322                         if (consfile == "not open") { abort = true; }
323                         else if (consfile == "not found") { 
324                                 consfile = "";  
325                                 //check for consfile
326                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
327                                 ifstream FileTest(tempConsFile.c_str());
328                                 if(FileTest){   
329                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
330                                         if (GoodFile) {  
331                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
332                                         }
333                                 }else {
334                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
335                                         ifstream FileTest2(tempConsFile.c_str());
336                                         if(FileTest2){  
337                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
338                                                 if (GoodFile) {  
339                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
340                                                 }
341                                         }
342                                 }
343                         }       
344                         
345                         quanfile = validParameter.validFile(parameters, "quantile", true);
346                         if (quanfile == "not open") { abort = true; }
347                         else if (quanfile == "not found") { quanfile = ""; }
348                 }
349         }
350         catch(exception& e) {
351                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
352                 exit(1);
353         }
354 }
355 //***************************************************************************************************************
356
357 int ChimeraPintailCommand::execute(){
358         try{
359                 
360                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
361                 
362                 for (int s = 0; s < fastaFileNames.size(); s++) {
363                                 
364                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
365
366                         int start = time(NULL); 
367                         
368                         //set user options
369                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
370                         
371                         //check for quantile to save the time
372                         string baseName = templatefile;
373                         if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
374                         
375                         string tempQuan = "";
376                         if ((!filter) && (maskfile == "")) {
377                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
378                         }else if ((!filter) && (maskfile != "")) { 
379                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
380                         }else if ((filter) && (maskfile != "")) { 
381                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
382                         }else if ((filter) && (maskfile == "")) { 
383                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
384                         }
385                         
386                         ifstream FileTest(tempQuan.c_str());
387                         if(FileTest){   
388                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
389                                 if (GoodFile) {  
390                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
391                                 }
392                         }else {
393                                 string tryPath = m->getDefaultPath();
394                                 string tempQuan = "";
395                                 if ((!filter) && (maskfile == "")) {
396                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
397                                 }else if ((!filter) && (maskfile != "")) { 
398                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
399                                 }else if ((filter) && (maskfile != "")) { 
400                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
401                                 }else if ((filter) && (maskfile == "")) { 
402                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
403                                 }
404                                 
405                                 ifstream FileTest2(tempQuan.c_str());
406                                 if(FileTest2){  
407                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
408                                         if (GoodFile) {  
409                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
410                                         }
411                                 }
412                         }
413                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
414                         
415                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
416                         string outputFileName, accnosFileName;
417             map<string, string> variables;
418             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
419                         if (maskfile != "") { variables["[tag]"] = m->getSimpleName(m->getRootName(maskfile)); }
420             outputFileName = getOutputFileName("chimera", variables);
421             accnosFileName = getOutputFileName("accnos", variables);
422                         
423                         
424                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
425                         
426                         if (chimera->getUnaligned()) { 
427                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
428                                 delete chimera;
429                                 return 0; 
430                         }
431                         templateSeqsLength = chimera->getLength();
432                 
433                 #ifdef USE_MPI
434                         int pid, numSeqsPerProcessor; 
435                                 int tag = 2001;
436                                 vector<unsigned long long> MPIPos;
437                                 
438                                 MPI_Status status; 
439                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
440                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
441
442                                 MPI_File inMPI;
443                                 MPI_File outMPI;
444                                 MPI_File outMPIAccnos;
445                                 
446                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
447                                 int inMode=MPI_MODE_RDONLY; 
448                                 
449                                 char outFilename[1024];
450                                 strcpy(outFilename, outputFileName.c_str());
451                                 
452                                 char outAccnosFilename[1024];
453                                 strcpy(outAccnosFilename, accnosFileName.c_str());
454                                 
455                                 char inFileName[1024];
456                                 strcpy(inFileName, fastaFileNames[s].c_str());
457
458                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
459                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
460                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
461                                 
462                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
463
464                                 if (pid == 0) { //you are the root process 
465                                                                 
466                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
467                                         
468                                         //send file positions to all processes
469                                         for(int i = 1; i < processors; i++) { 
470                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
471                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
472                                         }
473                                         
474                                         //figure out how many sequences you have to align
475                                         numSeqsPerProcessor = numSeqs / processors;
476                                         int startIndex =  pid * numSeqsPerProcessor;
477                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
478                                 
479                                         //do your part
480                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
481                                         
482                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
483                                         
484                                 }else{ //you are a child process
485                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
486                                         MPIPos.resize(numSeqs+1);
487                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
488                                         
489                                         //figure out how many sequences you have to align
490                                         numSeqsPerProcessor = numSeqs / processors;
491                                         int startIndex =  pid * numSeqsPerProcessor;
492                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
493                                         
494                                         //do your part
495                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
496                                         
497                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
498                                 }
499                                 
500                                 //close files 
501                                 MPI_File_close(&inMPI);
502                                 MPI_File_close(&outMPI);
503                                 MPI_File_close(&outMPIAccnos);
504                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
505                 #else
506                                                 
507                         //break up file
508                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
509                                 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
510                         
511                                 for (int i = 0; i < (positions.size()-1); i++) {
512                                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
513                                 }       
514                         
515                                 if(processors == 1){
516                 
517                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
518                                         
519                                         if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
520                                         
521                                 }else{
522                                         processIDS.resize(0);
523                                         
524                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
525                                 
526                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
527                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
528                                                 
529                                         //append output files
530                                         for(int i=1;i<processors;i++){
531                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
532                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
533                                         }
534                                         
535                                         //append output files
536                                         for(int i=1;i<processors;i++){
537                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
538                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
539                                         }
540                                                                                 
541                                         if (m->control_pressed) { 
542                                                 m->mothurRemove(outputFileName); 
543                                                 m->mothurRemove(accnosFileName);
544                                                 for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
545                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
546                                                 delete chimera;
547                                                 return 0;
548                                         }
549                                 }
550
551                         #else
552                                 lines.push_back(new linePair(0, 1000));
553                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
554                                 
555                                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
556                         #endif
557                         
558                 #endif  
559                 
560                         delete chimera;
561                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
562                         
563                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
564                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
565                         
566                         m->mothurOutEndLine();
567                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
568                 }
569                 
570                 //set accnos file as new current accnosfile
571                 string current = "";
572                 itTypes = outputTypes.find("accnos");
573                 if (itTypes != outputTypes.end()) {
574                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
575                 }
576                 
577                 m->mothurOutEndLine();
578                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
579                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
580                 m->mothurOutEndLine();
581                         
582                 return 0;
583                 
584         }
585         catch(exception& e) {
586                 m->errorOut(e, "ChimeraPintailCommand", "execute");
587                 exit(1);
588         }
589 }
590 //**********************************************************************************************************************
591
592 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
593         try {
594                 ofstream out;
595                 m->openOutputFile(outputFName, out);
596                 
597                 ofstream out2;
598                 m->openOutputFile(accnos, out2);
599                 
600                 ifstream inFASTA;
601                 m->openInputFile(filename, inFASTA);
602
603                 inFASTA.seekg(filePos->start);
604
605                 bool done = false;
606                 int count = 0;
607         
608                 while (!done) {
609                                 
610                         if (m->control_pressed) {       return 1;       }
611                 
612                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
613                                 
614                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
615                                 
616                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
617                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
618                                 }else{
619                                         //find chimeras
620                                         chimera->getChimeras(candidateSeq);
621                                         
622                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
623                 
624                                         //print results
625                                         chimera->print(out, out2);
626                                 }
627                                 count++;
628                         }
629                         delete candidateSeq;
630                         
631                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
632                                 unsigned long long pos = inFASTA.tellg();
633                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
634                         #else
635                                 if (inFASTA.eof()) { break; }
636                         #endif
637                         
638                         //report progress
639                         if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");             }
640                 }
641                 //report progress
642                 if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");             }
643                 
644                 out.close();
645                 out2.close();
646                 inFASTA.close();
647                                 
648                 return count;
649         }
650         catch(exception& e) {
651                 m->errorOut(e, "ChimeraPintailCommand", "driver");
652                 exit(1);
653         }
654 }
655 //**********************************************************************************************************************
656 #ifdef USE_MPI
657 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
658         try {
659                                 
660                 MPI_Status status; 
661                 int pid;
662                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
663                 
664                 for(int i=0;i<num;i++){
665                         
666                         if (m->control_pressed) {       return 1;       }
667                         
668                         //read next sequence
669                         int length = MPIPos[start+i+1] - MPIPos[start+i];
670         
671                         char* buf4 = new char[length];
672                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
673                         
674                         string tempBuf = buf4;
675                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
676                         istringstream iss (tempBuf,istringstream::in);
677                         delete buf4;
678
679                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
680                                 
681                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
682                                 
683                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
684                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
685                                 }else{
686                                         //find chimeras
687                                         chimera->getChimeras(candidateSeq);
688                                         
689                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
690                 
691                                         //print results
692                                         chimera->print(outMPI, outAccMPI);
693                                 }
694                         }
695                         delete candidateSeq;
696                         
697                         //report progress
698                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;                        }
699                 }
700                 //report progress
701                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl;         }
702                 
703                                 
704                 return 0;
705         }
706         catch(exception& e) {
707                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
708                 exit(1);
709         }
710 }
711 #endif
712
713 /**************************************************************************************************/
714
715 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
716         try {
717 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
718                 int process = 0;
719                 int num = 0;
720                 
721                 //loop through and create all the processes you want
722                 while (process != processors) {
723                         int pid = fork();
724                         
725                         if (pid > 0) {
726                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
727                                 process++;
728                         }else if (pid == 0){
729                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
730                                 
731                                 //pass numSeqs to parent
732                                 ofstream out;
733                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
734                                 m->openOutputFile(tempFile, out);
735                                 out << num << endl;
736                                 out.close();
737
738                                 exit(0);
739                         }else { 
740                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
741                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
742                                 exit(0);
743                         }
744                 }
745                 
746                 //force parent to wait until all the processes are done
747                 for (int i=0;i<processors;i++) { 
748                         int temp = processIDS[i];
749                         wait(&temp);
750                 }
751                 
752                 for (int i = 0; i < processIDS.size(); i++) {
753                         ifstream in;
754                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
755                         m->openInputFile(tempFile, in);
756                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
757                         in.close(); m->mothurRemove(tempFile);
758                 }
759                 
760                 return num;
761 #endif          
762         }
763         catch(exception& e) {
764                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
765                 exit(1);
766         }
767 }
768
769 /**************************************************************************************************/
770
771