]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
metastats in progress
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){  
16         try {
17                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52 #ifdef USE_MPI
53                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
54 #endif
55                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60                 helpString += "The chimera.pintail command should be in the following format: \n";
61                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
64                 return helpString;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 ChimeraPintailCommand::ChimeraPintailCommand(){ 
73         try {
74                 abort = true; calledHelp = true;
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["chimera"] = tempOutNames;
78                 outputTypes["accnos"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82                 exit(1);
83         }
84 }
85 //***************************************************************************************************************
86 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
87         try {
88                 abort = false; calledHelp = false;   
89                 rdb = ReferenceDB::getInstance();
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter("chimera.pintail");
102                         map<string,string>::iterator it;
103                         
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         vector<string> tempOutNames;
110                         outputTypes["chimera"] = tempOutNames;
111                         outputTypes["accnos"] = tempOutNames;
112                 
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("reference");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
124                                 }
125                                 
126                                 it = parameters.find("conservation");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
132                                 }
133                                 
134                                 it = parameters.find("quantile");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
140                                 }
141                         }
142
143                         
144                         //check for required parameters
145                         fastafile = validParameter.validFile(parameters, "fasta", false);
146                         if (fastafile == "not found") {                                 
147                                 //if there is a current fasta file, use it
148                                 string filename = m->getFastaFile(); 
149                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
151                         }else { 
152                                 m->splitAtDash(fastafile, fastaFileNames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < fastaFileNames.size(); i++) {
156                                         
157                                         bool ignore = false;
158                                         if (fastaFileNames[i] == "current") { 
159                                                 fastaFileNames[i] = m->getFastaFile(); 
160                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
161                                                 else {  
162                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
163                                                         //erase from file list
164                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
165                                                         i--;
166                                                 }
167                                         }
168                                         
169                                         if (!ignore) {
170                                         
171                                                 if (inputDir != "") {
172                                                         string path = m->hasPath(fastaFileNames[i]);
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
175                                                 }
176                 
177                                                 int ableToOpen;
178                                                 ifstream in;
179                                                 
180                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
181                                         
182                                                 //if you can't open it, try default location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getDefaultPath() != "") { //default path is set
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 if (ableToOpen == 1) {
195                                                         if (m->getOutputDir() != "") { //default path is set
196                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
198                                                                 ifstream in2;
199                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
200                                                                 in2.close();
201                                                                 fastaFileNames[i] = tryPath;
202                                                         }
203                                                 }
204
205                                                 in.close();
206                                                 
207                                                 if (ableToOpen == 1) { 
208                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
209                                                         //erase from file list
210                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
211                                                         i--;
212                                                 }else {
213                                                         m->setFastaFile(fastaFileNames[i]);
214                                                 }
215                                         }
216                                 }
217                                 
218                                 //make sure there is at least one valid file left
219                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
220                         }
221                         
222                         string temp;
223                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
224                         filter = m->isTrue(temp);
225                         
226                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
227                         m->setProcessors(temp);
228                         convert(temp, processors);
229                         
230                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
231                         convert(temp, window);
232                         
233                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
234                         convert(temp, increment);
235                         
236                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
237                         save = m->isTrue(temp); 
238                         rdb->save = save; 
239                         if (save) { //clear out old references
240                                 rdb->clearMemory();     
241                         }
242                         
243                         //this has to go after save so that if the user sets save=t and provides no reference we abort
244                         templatefile = validParameter.validFile(parameters, "reference", true);
245                         if (templatefile == "not found") { 
246                                 //check for saved reference sequences
247                                 if (rdb->referenceSeqs.size() != 0) {
248                                         templatefile = "saved";
249                                 }else {
250                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
251                                         m->mothurOutEndLine();
252                                         abort = true; 
253                                 }
254                         }else if (templatefile == "not open") { abort = true; } 
255                         else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
256                         
257                         
258                         maskfile = validParameter.validFile(parameters, "mask", false);
259                         if (maskfile == "not found") { maskfile = "";  }        
260                         else if (maskfile != "default")  { 
261                                 if (inputDir != "") {
262                                         string path = m->hasPath(maskfile);
263                                         //if the user has not given a path then, add inputdir. else leave path alone.
264                                         if (path == "") {       maskfile = inputDir + maskfile;         }
265                                 }
266
267                                 ifstream in;
268                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
269                                 if (ableToOpen == 1) { 
270                                         if (m->getDefaultPath() != "") { //default path is set
271                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
272                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
273                                                         ifstream in2;
274                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
275                                                         in2.close();
276                                                         maskfile = tryPath;
277                                         }
278                                 }
279                                 
280                                 if (ableToOpen == 1) {
281                                                 if (m->getOutputDir() != "") { //default path is set
282                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
283                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
284                                                         ifstream in2;
285                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
286                                                         in2.close();
287                                                         maskfile = tryPath;
288                                                 }
289                                 }
290                                 
291                                 in.close();
292                                         
293                                 if (ableToOpen == 1) { 
294                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
295                                                 abort = true;
296                                 }
297                         }
298
299                         
300                         //if the user changes the output directory command factory will send this info to us in the output parameter 
301                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
302                 
303                         consfile = validParameter.validFile(parameters, "conservation", true);
304                         if (consfile == "not open") { abort = true; }
305                         else if (consfile == "not found") { 
306                                 consfile = "";  
307                                 //check for consfile
308                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
309                                 ifstream FileTest(tempConsFile.c_str());
310                                 if(FileTest){   
311                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
312                                         if (GoodFile) {  
313                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
314                                         }
315                                 }else {
316                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
317                                         ifstream FileTest2(tempConsFile.c_str());
318                                         if(FileTest2){  
319                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
320                                                 if (GoodFile) {  
321                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
322                                                 }
323                                         }
324                                 }
325                         }       
326                         
327                         quanfile = validParameter.validFile(parameters, "quantile", true);
328                         if (quanfile == "not open") { abort = true; }
329                         else if (quanfile == "not found") { quanfile = ""; }
330                 }
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
334                 exit(1);
335         }
336 }
337 //***************************************************************************************************************
338
339 int ChimeraPintailCommand::execute(){
340         try{
341                 
342                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
343                 
344                 for (int s = 0; s < fastaFileNames.size(); s++) {
345                                 
346                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
347
348                         int start = time(NULL); 
349                         
350                         //set user options
351                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
352                         
353                         //check for quantile to save the time
354                         string baseName = templatefile;
355                         if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
356                         
357                         string tempQuan = "";
358                         if ((!filter) && (maskfile == "")) {
359                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
360                         }else if ((!filter) && (maskfile != "")) { 
361                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
362                         }else if ((filter) && (maskfile != "")) { 
363                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
364                         }else if ((filter) && (maskfile == "")) { 
365                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
366                         }
367                         
368                         ifstream FileTest(tempQuan.c_str());
369                         if(FileTest){   
370                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
371                                 if (GoodFile) {  
372                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
373                                 }
374                         }else {
375                                 string tryPath = m->getDefaultPath();
376                                 string tempQuan = "";
377                                 if ((!filter) && (maskfile == "")) {
378                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
379                                 }else if ((!filter) && (maskfile != "")) { 
380                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
381                                 }else if ((filter) && (maskfile != "")) { 
382                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
383                                 }else if ((filter) && (maskfile == "")) { 
384                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
385                                 }
386                                 
387                                 ifstream FileTest2(tempQuan.c_str());
388                                 if(FileTest2){  
389                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
390                                         if (GoodFile) {  
391                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
392                                         }
393                                 }
394                         }
395                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
396                         
397                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
398                         string outputFileName, accnosFileName;
399                         if (maskfile != "") {
400                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
401                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
402                         }else {
403                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
404                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
405                         }
406                         
407                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
408                         
409                         if (chimera->getUnaligned()) { 
410                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
411                                 delete chimera;
412                                 return 0; 
413                         }
414                         templateSeqsLength = chimera->getLength();
415                 
416                 #ifdef USE_MPI
417                         int pid, numSeqsPerProcessor; 
418                                 int tag = 2001;
419                                 vector<unsigned long long> MPIPos;
420                                 
421                                 MPI_Status status; 
422                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
423                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
424
425                                 MPI_File inMPI;
426                                 MPI_File outMPI;
427                                 MPI_File outMPIAccnos;
428                                 
429                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
430                                 int inMode=MPI_MODE_RDONLY; 
431                                 
432                                 char outFilename[1024];
433                                 strcpy(outFilename, outputFileName.c_str());
434                                 
435                                 char outAccnosFilename[1024];
436                                 strcpy(outAccnosFilename, accnosFileName.c_str());
437                                 
438                                 char inFileName[1024];
439                                 strcpy(inFileName, fastaFileNames[s].c_str());
440
441                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
442                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
443                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
444                                 
445                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
446
447                                 if (pid == 0) { //you are the root process 
448                                                                 
449                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
450                                         
451                                         //send file positions to all processes
452                                         for(int i = 1; i < processors; i++) { 
453                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
454                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
455                                         }
456                                         
457                                         //figure out how many sequences you have to align
458                                         numSeqsPerProcessor = numSeqs / processors;
459                                         int startIndex =  pid * numSeqsPerProcessor;
460                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
461                                 
462                                         //do your part
463                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
464                                         
465                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
466                                         
467                                 }else{ //you are a child process
468                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
469                                         MPIPos.resize(numSeqs+1);
470                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
471                                         
472                                         //figure out how many sequences you have to align
473                                         numSeqsPerProcessor = numSeqs / processors;
474                                         int startIndex =  pid * numSeqsPerProcessor;
475                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
476                                         
477                                         //do your part
478                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
479                                         
480                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
481                                 }
482                                 
483                                 //close files 
484                                 MPI_File_close(&inMPI);
485                                 MPI_File_close(&outMPI);
486                                 MPI_File_close(&outMPIAccnos);
487                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
488                 #else
489                                                 
490                         //break up file
491                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
492                                 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
493                         
494                                 for (int i = 0; i < (positions.size()-1); i++) {
495                                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
496                                 }       
497                         
498                                 if(processors == 1){
499                 
500                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
501                                         
502                                         if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
503                                         
504                                 }else{
505                                         processIDS.resize(0);
506                                         
507                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
508                                 
509                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
510                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
511                                                 
512                                         //append output files
513                                         for(int i=1;i<processors;i++){
514                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
515                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
516                                         }
517                                         
518                                         //append output files
519                                         for(int i=1;i<processors;i++){
520                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
521                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
522                                         }
523                                                                                 
524                                         if (m->control_pressed) { 
525                                                 m->mothurRemove(outputFileName); 
526                                                 m->mothurRemove(accnosFileName);
527                                                 for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
528                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
529                                                 delete chimera;
530                                                 return 0;
531                                         }
532                                 }
533
534                         #else
535                                 lines.push_back(new linePair(0, 1000));
536                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
537                                 
538                                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
539                         #endif
540                         
541                 #endif  
542                 
543                         delete chimera;
544                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
545                         
546                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
547                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
548                         
549                         m->mothurOutEndLine();
550                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
551                 }
552                 
553                 //set accnos file as new current accnosfile
554                 string current = "";
555                 itTypes = outputTypes.find("accnos");
556                 if (itTypes != outputTypes.end()) {
557                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
558                 }
559                 
560                 m->mothurOutEndLine();
561                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
562                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
563                 m->mothurOutEndLine();
564                         
565                 return 0;
566                 
567         }
568         catch(exception& e) {
569                 m->errorOut(e, "ChimeraPintailCommand", "execute");
570                 exit(1);
571         }
572 }
573 //**********************************************************************************************************************
574
575 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
576         try {
577                 ofstream out;
578                 m->openOutputFile(outputFName, out);
579                 
580                 ofstream out2;
581                 m->openOutputFile(accnos, out2);
582                 
583                 ifstream inFASTA;
584                 m->openInputFile(filename, inFASTA);
585
586                 inFASTA.seekg(filePos->start);
587
588                 bool done = false;
589                 int count = 0;
590         
591                 while (!done) {
592                                 
593                         if (m->control_pressed) {       return 1;       }
594                 
595                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
596                                 
597                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
598                                 
599                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
600                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
601                                 }else{
602                                         //find chimeras
603                                         chimera->getChimeras(candidateSeq);
604                                         
605                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
606                 
607                                         //print results
608                                         chimera->print(out, out2);
609                                 }
610                                 count++;
611                         }
612                         delete candidateSeq;
613                         
614                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
615                                 unsigned long long pos = inFASTA.tellg();
616                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
617                         #else
618                                 if (inFASTA.eof()) { break; }
619                         #endif
620                         
621                         //report progress
622                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
623                 }
624                 //report progress
625                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
626                 
627                 out.close();
628                 out2.close();
629                 inFASTA.close();
630                                 
631                 return count;
632         }
633         catch(exception& e) {
634                 m->errorOut(e, "ChimeraPintailCommand", "driver");
635                 exit(1);
636         }
637 }
638 //**********************************************************************************************************************
639 #ifdef USE_MPI
640 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
641         try {
642                                 
643                 MPI_Status status; 
644                 int pid;
645                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
646                 
647                 for(int i=0;i<num;i++){
648                         
649                         if (m->control_pressed) {       return 1;       }
650                         
651                         //read next sequence
652                         int length = MPIPos[start+i+1] - MPIPos[start+i];
653         
654                         char* buf4 = new char[length];
655                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
656                         
657                         string tempBuf = buf4;
658                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
659                         istringstream iss (tempBuf,istringstream::in);
660                         delete buf4;
661
662                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
663                                 
664                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
665                                 
666                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
667                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
668                                 }else{
669                                         //find chimeras
670                                         chimera->getChimeras(candidateSeq);
671                                         
672                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
673                 
674                                         //print results
675                                         chimera->print(outMPI, outAccMPI);
676                                 }
677                         }
678                         delete candidateSeq;
679                         
680                         //report progress
681                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
682                 }
683                 //report progress
684                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
685                 
686                                 
687                 return 0;
688         }
689         catch(exception& e) {
690                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
691                 exit(1);
692         }
693 }
694 #endif
695
696 /**************************************************************************************************/
697
698 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
699         try {
700 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
701                 int process = 0;
702                 int num = 0;
703                 
704                 //loop through and create all the processes you want
705                 while (process != processors) {
706                         int pid = fork();
707                         
708                         if (pid > 0) {
709                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
710                                 process++;
711                         }else if (pid == 0){
712                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
713                                 
714                                 //pass numSeqs to parent
715                                 ofstream out;
716                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
717                                 m->openOutputFile(tempFile, out);
718                                 out << num << endl;
719                                 out.close();
720
721                                 exit(0);
722                         }else { 
723                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
724                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
725                                 exit(0);
726                         }
727                 }
728                 
729                 //force parent to wait until all the processes are done
730                 for (int i=0;i<processors;i++) { 
731                         int temp = processIDS[i];
732                         wait(&temp);
733                 }
734                 
735                 for (int i = 0; i < processIDS.size(); i++) {
736                         ifstream in;
737                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
738                         m->openInputFile(tempFile, in);
739                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
740                         in.close(); m->mothurRemove(tempFile);
741                 }
742                 
743                 return num;
744 #endif          
745         }
746         catch(exception& e) {
747                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
748                 exit(1);
749         }
750 }
751
752 /**************************************************************************************************/
753
754