]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){  
16         try {
17                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52 #ifdef USE_MPI
53                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
54 #endif
55                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60                 helpString += "The chimera.pintail command should be in the following format: \n";
61                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
64                 return helpString;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 string ChimeraPintailCommand::getOutputFileNameTag(string type, string inputName=""){   
73         try {
74         string outputFileName = "";
75                 map<string, vector<string> >::iterator it;
76         
77         //is this a type this command creates
78         it = outputTypes.find(type);
79         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
80         else {
81             if (type == "chimera") {  outputFileName =  "pintail.chimeras"; }
82             else if (type == "accnos") {  outputFileName =  "pintail.accnos"; }
83             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
84         }
85         return outputFileName;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "ChimeraPintailCommand", "getOutputFileNameTag");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 ChimeraPintailCommand::ChimeraPintailCommand(){ 
94         try {
95                 abort = true; calledHelp = true;
96                 setParameters();
97                 vector<string> tempOutNames;
98                 outputTypes["chimera"] = tempOutNames;
99                 outputTypes["accnos"] = tempOutNames;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
103                 exit(1);
104         }
105 }
106 //***************************************************************************************************************
107 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
108         try {
109                 abort = false; calledHelp = false;   
110                 rdb = ReferenceDB::getInstance();
111                 
112                 //allow user to run help
113                 if(option == "help") { help(); abort = true; calledHelp = true; }
114                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
115                 
116                 else {
117                         vector<string> myArray = setParameters();
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter("chimera.pintail");
123                         map<string,string>::iterator it;
124                         
125                         //check to make sure all parameters are valid for command
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         vector<string> tempOutNames;
131                         outputTypes["chimera"] = tempOutNames;
132                         outputTypes["accnos"] = tempOutNames;
133                 
134                         //if the user changes the input directory command factory will send this info to us in the output parameter 
135                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
136                         if (inputDir == "not found"){   inputDir = "";          }
137                         else {
138                                 string path;
139                                 it = parameters.find("reference");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
145                                 }
146                                 
147                                 it = parameters.find("conservation");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
153                                 }
154                                 
155                                 it = parameters.find("quantile");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
161                                 }
162                         }
163
164                         
165                         //check for required parameters
166                         fastafile = validParameter.validFile(parameters, "fasta", false);
167                         if (fastafile == "not found") {                                 
168                                 //if there is a current fasta file, use it
169                                 string filename = m->getFastaFile(); 
170                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
171                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
172                         }else { 
173                                 m->splitAtDash(fastafile, fastaFileNames);
174                                 
175                                 //go through files and make sure they are good, if not, then disregard them
176                                 for (int i = 0; i < fastaFileNames.size(); i++) {
177                                         
178                                         bool ignore = false;
179                                         if (fastaFileNames[i] == "current") { 
180                                                 fastaFileNames[i] = m->getFastaFile(); 
181                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
182                                                 else {  
183                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
184                                                         //erase from file list
185                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
186                                                         i--;
187                                                 }
188                                         }
189                                         
190                                         if (!ignore) {
191                                         
192                                                 if (inputDir != "") {
193                                                         string path = m->hasPath(fastaFileNames[i]);
194                                                         //if the user has not given a path then, add inputdir. else leave path alone.
195                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
196                                                 }
197                 
198                                                 int ableToOpen;
199                                                 ifstream in;
200                                                 
201                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
202                                         
203                                                 //if you can't open it, try default location
204                                                 if (ableToOpen == 1) {
205                                                         if (m->getDefaultPath() != "") { //default path is set
206                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
207                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
208                                                                 ifstream in2;
209                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
210                                                                 in2.close();
211                                                                 fastaFileNames[i] = tryPath;
212                                                         }
213                                                 }
214                                                 
215                                                 if (ableToOpen == 1) {
216                                                         if (m->getOutputDir() != "") { //default path is set
217                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
218                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
219                                                                 ifstream in2;
220                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
221                                                                 in2.close();
222                                                                 fastaFileNames[i] = tryPath;
223                                                         }
224                                                 }
225
226                                                 in.close();
227                                                 
228                                                 if (ableToOpen == 1) { 
229                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
230                                                         //erase from file list
231                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
232                                                         i--;
233                                                 }else {
234                                                         m->setFastaFile(fastaFileNames[i]);
235                                                 }
236                                         }
237                                 }
238                                 
239                                 //make sure there is at least one valid file left
240                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
241                         }
242                         
243                         string temp;
244                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
245                         filter = m->isTrue(temp);
246                         
247                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
248                         m->setProcessors(temp);
249                         m->mothurConvert(temp, processors);
250                         
251                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
252                         m->mothurConvert(temp, window);
253                         
254                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
255                         m->mothurConvert(temp, increment);
256                         
257                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
258                         save = m->isTrue(temp); 
259                         rdb->save = save; 
260                         if (save) { //clear out old references
261                                 rdb->clearMemory();     
262                         }
263                         
264                         //this has to go after save so that if the user sets save=t and provides no reference we abort
265                         templatefile = validParameter.validFile(parameters, "reference", true);
266                         if (templatefile == "not found") { 
267                                 //check for saved reference sequences
268                                 if (rdb->referenceSeqs.size() != 0) {
269                                         templatefile = "saved";
270                                 }else {
271                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
272                                         m->mothurOutEndLine();
273                                         abort = true; 
274                                 }
275                         }else if (templatefile == "not open") { abort = true; } 
276                         else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
277                         
278                         
279                         maskfile = validParameter.validFile(parameters, "mask", false);
280                         if (maskfile == "not found") { maskfile = "";  }        
281                         else if (maskfile != "default")  { 
282                                 if (inputDir != "") {
283                                         string path = m->hasPath(maskfile);
284                                         //if the user has not given a path then, add inputdir. else leave path alone.
285                                         if (path == "") {       maskfile = inputDir + maskfile;         }
286                                 }
287
288                                 ifstream in;
289                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
290                                 if (ableToOpen == 1) { 
291                                         if (m->getDefaultPath() != "") { //default path is set
292                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
293                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
294                                                         ifstream in2;
295                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
296                                                         in2.close();
297                                                         maskfile = tryPath;
298                                         }
299                                 }
300                                 
301                                 if (ableToOpen == 1) {
302                                                 if (m->getOutputDir() != "") { //default path is set
303                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
304                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
305                                                         ifstream in2;
306                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
307                                                         in2.close();
308                                                         maskfile = tryPath;
309                                                 }
310                                 }
311                                 
312                                 in.close();
313                                         
314                                 if (ableToOpen == 1) { 
315                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
316                                                 abort = true;
317                                 }
318                         }
319
320                         
321                         //if the user changes the output directory command factory will send this info to us in the output parameter 
322                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
323                 
324                         consfile = validParameter.validFile(parameters, "conservation", true);
325                         if (consfile == "not open") { abort = true; }
326                         else if (consfile == "not found") { 
327                                 consfile = "";  
328                                 //check for consfile
329                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
330                                 ifstream FileTest(tempConsFile.c_str());
331                                 if(FileTest){   
332                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
333                                         if (GoodFile) {  
334                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
335                                         }
336                                 }else {
337                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
338                                         ifstream FileTest2(tempConsFile.c_str());
339                                         if(FileTest2){  
340                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
341                                                 if (GoodFile) {  
342                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
343                                                 }
344                                         }
345                                 }
346                         }       
347                         
348                         quanfile = validParameter.validFile(parameters, "quantile", true);
349                         if (quanfile == "not open") { abort = true; }
350                         else if (quanfile == "not found") { quanfile = ""; }
351                 }
352         }
353         catch(exception& e) {
354                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
355                 exit(1);
356         }
357 }
358 //***************************************************************************************************************
359
360 int ChimeraPintailCommand::execute(){
361         try{
362                 
363                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
364                 
365                 for (int s = 0; s < fastaFileNames.size(); s++) {
366                                 
367                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
368
369                         int start = time(NULL); 
370                         
371                         //set user options
372                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
373                         
374                         //check for quantile to save the time
375                         string baseName = templatefile;
376                         if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
377                         
378                         string tempQuan = "";
379                         if ((!filter) && (maskfile == "")) {
380                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
381                         }else if ((!filter) && (maskfile != "")) { 
382                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
383                         }else if ((filter) && (maskfile != "")) { 
384                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
385                         }else if ((filter) && (maskfile == "")) { 
386                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
387                         }
388                         
389                         ifstream FileTest(tempQuan.c_str());
390                         if(FileTest){   
391                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
392                                 if (GoodFile) {  
393                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
394                                 }
395                         }else {
396                                 string tryPath = m->getDefaultPath();
397                                 string tempQuan = "";
398                                 if ((!filter) && (maskfile == "")) {
399                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
400                                 }else if ((!filter) && (maskfile != "")) { 
401                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
402                                 }else if ((filter) && (maskfile != "")) { 
403                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
404                                 }else if ((filter) && (maskfile == "")) { 
405                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
406                                 }
407                                 
408                                 ifstream FileTest2(tempQuan.c_str());
409                                 if(FileTest2){  
410                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
411                                         if (GoodFile) {  
412                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
413                                         }
414                                 }
415                         }
416                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
417                         
418                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
419                         string outputFileName, accnosFileName;
420                         if (maskfile != "") {
421                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("chimera");
422                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("accnos");
423                         }else {
424                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
425                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
426                         }
427                         
428                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
429                         
430                         if (chimera->getUnaligned()) { 
431                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
432                                 delete chimera;
433                                 return 0; 
434                         }
435                         templateSeqsLength = chimera->getLength();
436                 
437                 #ifdef USE_MPI
438                         int pid, numSeqsPerProcessor; 
439                                 int tag = 2001;
440                                 vector<unsigned long long> MPIPos;
441                                 
442                                 MPI_Status status; 
443                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
444                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
445
446                                 MPI_File inMPI;
447                                 MPI_File outMPI;
448                                 MPI_File outMPIAccnos;
449                                 
450                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
451                                 int inMode=MPI_MODE_RDONLY; 
452                                 
453                                 char outFilename[1024];
454                                 strcpy(outFilename, outputFileName.c_str());
455                                 
456                                 char outAccnosFilename[1024];
457                                 strcpy(outAccnosFilename, accnosFileName.c_str());
458                                 
459                                 char inFileName[1024];
460                                 strcpy(inFileName, fastaFileNames[s].c_str());
461
462                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
463                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
464                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
465                                 
466                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
467
468                                 if (pid == 0) { //you are the root process 
469                                                                 
470                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
471                                         
472                                         //send file positions to all processes
473                                         for(int i = 1; i < processors; i++) { 
474                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
475                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
476                                         }
477                                         
478                                         //figure out how many sequences you have to align
479                                         numSeqsPerProcessor = numSeqs / processors;
480                                         int startIndex =  pid * numSeqsPerProcessor;
481                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
482                                 
483                                         //do your part
484                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
485                                         
486                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
487                                         
488                                 }else{ //you are a child process
489                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
490                                         MPIPos.resize(numSeqs+1);
491                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
492                                         
493                                         //figure out how many sequences you have to align
494                                         numSeqsPerProcessor = numSeqs / processors;
495                                         int startIndex =  pid * numSeqsPerProcessor;
496                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
497                                         
498                                         //do your part
499                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
500                                         
501                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
502                                 }
503                                 
504                                 //close files 
505                                 MPI_File_close(&inMPI);
506                                 MPI_File_close(&outMPI);
507                                 MPI_File_close(&outMPIAccnos);
508                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
509                 #else
510                                                 
511                         //break up file
512                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
513                                 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
514                         
515                                 for (int i = 0; i < (positions.size()-1); i++) {
516                                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
517                                 }       
518                         
519                                 if(processors == 1){
520                 
521                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
522                                         
523                                         if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
524                                         
525                                 }else{
526                                         processIDS.resize(0);
527                                         
528                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
529                                 
530                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
531                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
532                                                 
533                                         //append output files
534                                         for(int i=1;i<processors;i++){
535                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
536                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
537                                         }
538                                         
539                                         //append output files
540                                         for(int i=1;i<processors;i++){
541                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
542                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
543                                         }
544                                                                                 
545                                         if (m->control_pressed) { 
546                                                 m->mothurRemove(outputFileName); 
547                                                 m->mothurRemove(accnosFileName);
548                                                 for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
549                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
550                                                 delete chimera;
551                                                 return 0;
552                                         }
553                                 }
554
555                         #else
556                                 lines.push_back(new linePair(0, 1000));
557                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
558                                 
559                                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
560                         #endif
561                         
562                 #endif  
563                 
564                         delete chimera;
565                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
566                         
567                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
568                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
569                         
570                         m->mothurOutEndLine();
571                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
572                 }
573                 
574                 //set accnos file as new current accnosfile
575                 string current = "";
576                 itTypes = outputTypes.find("accnos");
577                 if (itTypes != outputTypes.end()) {
578                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
579                 }
580                 
581                 m->mothurOutEndLine();
582                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
583                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
584                 m->mothurOutEndLine();
585                         
586                 return 0;
587                 
588         }
589         catch(exception& e) {
590                 m->errorOut(e, "ChimeraPintailCommand", "execute");
591                 exit(1);
592         }
593 }
594 //**********************************************************************************************************************
595
596 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
597         try {
598                 ofstream out;
599                 m->openOutputFile(outputFName, out);
600                 
601                 ofstream out2;
602                 m->openOutputFile(accnos, out2);
603                 
604                 ifstream inFASTA;
605                 m->openInputFile(filename, inFASTA);
606
607                 inFASTA.seekg(filePos->start);
608
609                 bool done = false;
610                 int count = 0;
611         
612                 while (!done) {
613                                 
614                         if (m->control_pressed) {       return 1;       }
615                 
616                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
617                                 
618                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
619                                 
620                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
621                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
622                                 }else{
623                                         //find chimeras
624                                         chimera->getChimeras(candidateSeq);
625                                         
626                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
627                 
628                                         //print results
629                                         chimera->print(out, out2);
630                                 }
631                                 count++;
632                         }
633                         delete candidateSeq;
634                         
635                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
636                                 unsigned long long pos = inFASTA.tellg();
637                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
638                         #else
639                                 if (inFASTA.eof()) { break; }
640                         #endif
641                         
642                         //report progress
643                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
644                 }
645                 //report progress
646                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
647                 
648                 out.close();
649                 out2.close();
650                 inFASTA.close();
651                                 
652                 return count;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "ChimeraPintailCommand", "driver");
656                 exit(1);
657         }
658 }
659 //**********************************************************************************************************************
660 #ifdef USE_MPI
661 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
662         try {
663                                 
664                 MPI_Status status; 
665                 int pid;
666                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
667                 
668                 for(int i=0;i<num;i++){
669                         
670                         if (m->control_pressed) {       return 1;       }
671                         
672                         //read next sequence
673                         int length = MPIPos[start+i+1] - MPIPos[start+i];
674         
675                         char* buf4 = new char[length];
676                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
677                         
678                         string tempBuf = buf4;
679                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
680                         istringstream iss (tempBuf,istringstream::in);
681                         delete buf4;
682
683                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
684                                 
685                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
686                                 
687                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
688                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
689                                 }else{
690                                         //find chimeras
691                                         chimera->getChimeras(candidateSeq);
692                                         
693                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
694                 
695                                         //print results
696                                         chimera->print(outMPI, outAccMPI);
697                                 }
698                         }
699                         delete candidateSeq;
700                         
701                         //report progress
702                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
703                 }
704                 //report progress
705                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
706                 
707                                 
708                 return 0;
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
712                 exit(1);
713         }
714 }
715 #endif
716
717 /**************************************************************************************************/
718
719 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
720         try {
721 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
722                 int process = 0;
723                 int num = 0;
724                 
725                 //loop through and create all the processes you want
726                 while (process != processors) {
727                         int pid = fork();
728                         
729                         if (pid > 0) {
730                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
731                                 process++;
732                         }else if (pid == 0){
733                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
734                                 
735                                 //pass numSeqs to parent
736                                 ofstream out;
737                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
738                                 m->openOutputFile(tempFile, out);
739                                 out << num << endl;
740                                 out.close();
741
742                                 exit(0);
743                         }else { 
744                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
745                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
746                                 exit(0);
747                         }
748                 }
749                 
750                 //force parent to wait until all the processes are done
751                 for (int i=0;i<processors;i++) { 
752                         int temp = processIDS[i];
753                         wait(&temp);
754                 }
755                 
756                 for (int i = 0; i < processIDS.size(); i++) {
757                         ifstream in;
758                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
759                         m->openInputFile(tempFile, in);
760                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
761                         in.close(); m->mothurRemove(tempFile);
762                 }
763                 
764                 return num;
765 #endif          
766         }
767         catch(exception& e) {
768                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
769                 exit(1);
770         }
771 }
772
773 /**************************************************************************************************/
774
775