]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
1.21.0
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){  
16         try {
17                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52 #ifdef USE_MPI
53                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
54 #endif
55                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60                 helpString += "The chimera.pintail command should be in the following format: \n";
61                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
64                 return helpString;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 ChimeraPintailCommand::ChimeraPintailCommand(){ 
73         try {
74                 abort = true; calledHelp = true;
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["chimera"] = tempOutNames;
78                 outputTypes["accnos"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82                 exit(1);
83         }
84 }
85 //***************************************************************************************************************
86 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
87         try {
88                 abort = false; calledHelp = false;   
89                 rdb = ReferenceDB::getInstance();
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter("chimera.pintail");
102                         map<string,string>::iterator it;
103                         
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         vector<string> tempOutNames;
110                         outputTypes["chimera"] = tempOutNames;
111                         outputTypes["accnos"] = tempOutNames;
112                 
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("reference");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
124                                 }
125                                 
126                                 it = parameters.find("conservation");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
132                                 }
133                                 
134                                 it = parameters.find("quantile");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
140                                 }
141                         }
142
143                         
144                         //check for required parameters
145                         fastafile = validParameter.validFile(parameters, "fasta", false);
146                         if (fastafile == "not found") {                                 
147                                 //if there is a current fasta file, use it
148                                 string filename = m->getFastaFile(); 
149                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
151                         }else { 
152                                 m->splitAtDash(fastafile, fastaFileNames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < fastaFileNames.size(); i++) {
156                                         
157                                         bool ignore = false;
158                                         if (fastaFileNames[i] == "current") { 
159                                                 fastaFileNames[i] = m->getFastaFile(); 
160                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
161                                                 else {  
162                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
163                                                         //erase from file list
164                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
165                                                         i--;
166                                                 }
167                                         }
168                                         
169                                         if (!ignore) {
170                                         
171                                                 if (inputDir != "") {
172                                                         string path = m->hasPath(fastaFileNames[i]);
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
175                                                 }
176                 
177                                                 int ableToOpen;
178                                                 ifstream in;
179                                                 
180                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
181                                         
182                                                 //if you can't open it, try default location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getDefaultPath() != "") { //default path is set
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 if (ableToOpen == 1) {
195                                                         if (m->getOutputDir() != "") { //default path is set
196                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
198                                                                 ifstream in2;
199                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
200                                                                 in2.close();
201                                                                 fastaFileNames[i] = tryPath;
202                                                         }
203                                                 }
204
205                                                 in.close();
206                                                 
207                                                 if (ableToOpen == 1) { 
208                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
209                                                         //erase from file list
210                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
211                                                         i--;
212                                                 }else {
213                                                         m->setFastaFile(fastaFileNames[i]);
214                                                 }
215                                         }
216                                 }
217                                 
218                                 //make sure there is at least one valid file left
219                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
220                         }
221                         
222                         string temp;
223                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
224                         filter = m->isTrue(temp);
225                         
226                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
227                         m->setProcessors(temp);
228                         convert(temp, processors);
229                         
230                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
231                         convert(temp, window);
232                         
233                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
234                         convert(temp, increment);
235                         
236                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
237                         save = m->isTrue(temp); 
238                         rdb->save = save; 
239                         if (save) { //clear out old references
240                                 rdb->clearMemory();     
241                         }
242                         
243                         //this has to go after save so that if the user sets save=t and provides no reference we abort
244                         templatefile = validParameter.validFile(parameters, "reference", true);
245                         if (templatefile == "not found") { 
246                                 //check for saved reference sequences
247                                 if (rdb->referenceSeqs.size() != 0) {
248                                         templatefile = "saved";
249                                 }else {
250                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
251                                         m->mothurOutEndLine();
252                                         abort = true; 
253                                 }
254                         }else if (templatefile == "not open") { abort = true; } 
255                         else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
256                         
257                         
258                         maskfile = validParameter.validFile(parameters, "mask", false);
259                         if (maskfile == "not found") { maskfile = "";  }        
260                         else if (maskfile != "default")  { 
261                                 if (inputDir != "") {
262                                         string path = m->hasPath(maskfile);
263                                         //if the user has not given a path then, add inputdir. else leave path alone.
264                                         if (path == "") {       maskfile = inputDir + maskfile;         }
265                                 }
266
267                                 ifstream in;
268                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
269                                 if (ableToOpen == 1) { 
270                                         if (m->getDefaultPath() != "") { //default path is set
271                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
272                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
273                                                         ifstream in2;
274                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
275                                                         in2.close();
276                                                         maskfile = tryPath;
277                                         }
278                                 }
279                                 
280                                 if (ableToOpen == 1) {
281                                                 if (m->getOutputDir() != "") { //default path is set
282                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
283                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
284                                                         ifstream in2;
285                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
286                                                         in2.close();
287                                                         maskfile = tryPath;
288                                                 }
289                                 }
290                                 
291                                 in.close();
292                                         
293                                 if (ableToOpen == 1) { 
294                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
295                                                 abort = true;
296                                 }
297                         }
298
299                         
300                         //if the user changes the output directory command factory will send this info to us in the output parameter 
301                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
302                 
303                         consfile = validParameter.validFile(parameters, "conservation", true);
304                         if (consfile == "not open") { abort = true; }
305                         else if (consfile == "not found") { 
306                                 consfile = "";  
307                                 //check for consfile
308                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
309                                 ifstream FileTest(tempConsFile.c_str());
310                                 if(FileTest){   
311                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
312                                         if (GoodFile) {  
313                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
314                                         }
315                                 }else {
316                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
317                                         ifstream FileTest2(tempConsFile.c_str());
318                                         if(FileTest2){  
319                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
320                                                 if (GoodFile) {  
321                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
322                                                 }
323                                         }
324                                 }
325                         }       
326                         
327                         quanfile = validParameter.validFile(parameters, "quantile", true);
328                         if (quanfile == "not open") { abort = true; }
329                         else if (quanfile == "not found") { quanfile = ""; }
330                 }
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
334                 exit(1);
335         }
336 }
337 //***************************************************************************************************************
338
339 int ChimeraPintailCommand::execute(){
340         try{
341                 
342                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
343                 
344                 for (int s = 0; s < fastaFileNames.size(); s++) {
345                                 
346                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
347
348                         int start = time(NULL); 
349                         
350                         //set user options
351                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
352                         
353                         //check for quantile to save the time
354                         string baseName = templatefile;
355                         if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
356                         
357                         string tempQuan = "";
358                         if ((!filter) && (maskfile == "")) {
359                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
360                         }else if ((!filter) && (maskfile != "")) { 
361                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
362                         }else if ((filter) && (maskfile != "")) { 
363                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
364                         }else if ((filter) && (maskfile == "")) { 
365                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
366                         }
367                         
368                         ifstream FileTest(tempQuan.c_str());
369                         if(FileTest){   
370                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
371                                 if (GoodFile) {  
372                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
373                                 }
374                         }else {
375                                 string tryPath = m->getDefaultPath();
376                                 string tempQuan = "";
377                                 if ((!filter) && (maskfile == "")) {
378                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
379                                 }else if ((!filter) && (maskfile != "")) { 
380                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
381                                 }else if ((filter) && (maskfile != "")) { 
382                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
383                                 }else if ((filter) && (maskfile == "")) { 
384                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
385                                 }
386                                 
387                                 ifstream FileTest2(tempQuan.c_str());
388                                 if(FileTest2){  
389                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
390                                         if (GoodFile) {  
391                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
392                                         }
393                                 }
394                         }
395                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
396                         
397                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
398                         string outputFileName, accnosFileName;
399                         if (maskfile != "") {
400                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
401                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
402                         }else {
403                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
404                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
405                         }
406                         
407                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
408                         
409                         if (chimera->getUnaligned()) { 
410                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
411                                 delete chimera;
412                                 return 0; 
413                         }
414                         templateSeqsLength = chimera->getLength();
415                 
416                 #ifdef USE_MPI
417                         int pid, numSeqsPerProcessor; 
418                                 int tag = 2001;
419                                 vector<unsigned long int> MPIPos;
420                                 
421                                 MPI_Status status; 
422                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
423                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
424
425                                 MPI_File inMPI;
426                                 MPI_File outMPI;
427                                 MPI_File outMPIAccnos;
428                                 
429                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
430                                 int inMode=MPI_MODE_RDONLY; 
431                                 
432                                 char outFilename[1024];
433                                 strcpy(outFilename, outputFileName.c_str());
434                                 
435                                 char outAccnosFilename[1024];
436                                 strcpy(outAccnosFilename, accnosFileName.c_str());
437                                 
438                                 char inFileName[1024];
439                                 strcpy(inFileName, fastaFileNames[s].c_str());
440
441                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
442                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
443                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
444                                 
445                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
446
447                                 if (pid == 0) { //you are the root process 
448                                                                 
449                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
450                                         
451                                         //send file positions to all processes
452                                         for(int i = 1; i < processors; i++) { 
453                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
454                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
455                                         }
456                                         
457                                         //figure out how many sequences you have to align
458                                         numSeqsPerProcessor = numSeqs / processors;
459                                         int startIndex =  pid * numSeqsPerProcessor;
460                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
461                                 
462                                         //do your part
463                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
464                                         
465                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
466                                         
467                                 }else{ //you are a child process
468                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
469                                         MPIPos.resize(numSeqs+1);
470                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
471                                         
472                                         //figure out how many sequences you have to align
473                                         numSeqsPerProcessor = numSeqs / processors;
474                                         int startIndex =  pid * numSeqsPerProcessor;
475                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
476                                         
477                                         //do your part
478                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
479                                         
480                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
481                                 }
482                                 
483                                 //close files 
484                                 MPI_File_close(&inMPI);
485                                 MPI_File_close(&outMPI);
486                                 MPI_File_close(&outMPIAccnos);
487                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
488                 #else
489                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
490                                 
491                         for (int i = 0; i < (positions.size()-1); i++) {
492                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
493                         }       
494                         
495                         //break up file
496                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
497                                 if(processors == 1){
498                 
499                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
500                                         
501                                         if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
502                                         
503                                 }else{
504                                         processIDS.resize(0);
505                                         
506                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
507                                 
508                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
509                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
510                                                 
511                                         //append output files
512                                         for(int i=1;i<processors;i++){
513                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
514                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
515                                         }
516                                         
517                                         //append output files
518                                         for(int i=1;i<processors;i++){
519                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
520                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
521                                         }
522                                                                                 
523                                         if (m->control_pressed) { 
524                                                 m->mothurRemove(outputFileName); 
525                                                 m->mothurRemove(accnosFileName);
526                                                 for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
527                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
528                                                 delete chimera;
529                                                 return 0;
530                                         }
531                                 }
532
533                         #else
534                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
535                                 
536                                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
537                         #endif
538                         
539                 #endif  
540                 
541                         delete chimera;
542                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
543                         
544                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
545                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
546                         
547                         m->mothurOutEndLine();
548                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
549                 }
550                 
551                 //set accnos file as new current accnosfile
552                 string current = "";
553                 itTypes = outputTypes.find("accnos");
554                 if (itTypes != outputTypes.end()) {
555                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
556                 }
557                 
558                 m->mothurOutEndLine();
559                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
560                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
561                 m->mothurOutEndLine();
562                         
563                 return 0;
564                 
565         }
566         catch(exception& e) {
567                 m->errorOut(e, "ChimeraPintailCommand", "execute");
568                 exit(1);
569         }
570 }
571 //**********************************************************************************************************************
572
573 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
574         try {
575                 ofstream out;
576                 m->openOutputFile(outputFName, out);
577                 
578                 ofstream out2;
579                 m->openOutputFile(accnos, out2);
580                 
581                 ifstream inFASTA;
582                 m->openInputFile(filename, inFASTA);
583
584                 inFASTA.seekg(filePos->start);
585
586                 bool done = false;
587                 int count = 0;
588         
589                 while (!done) {
590                                 
591                         if (m->control_pressed) {       return 1;       }
592                 
593                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
594                                 
595                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
596                                 
597                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
598                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
599                                 }else{
600                                         //find chimeras
601                                         chimera->getChimeras(candidateSeq);
602                                         
603                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
604                 
605                                         //print results
606                                         chimera->print(out, out2);
607                                 }
608                                 count++;
609                         }
610                         delete candidateSeq;
611                         
612                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
613                                 unsigned long int pos = inFASTA.tellg();
614                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
615                         #else
616                                 if (inFASTA.eof()) { break; }
617                         #endif
618                         
619                         //report progress
620                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
621                 }
622                 //report progress
623                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
624                 
625                 out.close();
626                 out2.close();
627                 inFASTA.close();
628                                 
629                 return count;
630         }
631         catch(exception& e) {
632                 m->errorOut(e, "ChimeraPintailCommand", "driver");
633                 exit(1);
634         }
635 }
636 //**********************************************************************************************************************
637 #ifdef USE_MPI
638 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
639         try {
640                                 
641                 MPI_Status status; 
642                 int pid;
643                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
644                 
645                 for(int i=0;i<num;i++){
646                         
647                         if (m->control_pressed) {       return 1;       }
648                         
649                         //read next sequence
650                         int length = MPIPos[start+i+1] - MPIPos[start+i];
651         
652                         char* buf4 = new char[length];
653                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
654                         
655                         string tempBuf = buf4;
656                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
657                         istringstream iss (tempBuf,istringstream::in);
658                         delete buf4;
659
660                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
661                                 
662                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
663                                 
664                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
665                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
666                                 }else{
667                                         //find chimeras
668                                         chimera->getChimeras(candidateSeq);
669                                         
670                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
671                 
672                                         //print results
673                                         chimera->print(outMPI, outAccMPI);
674                                 }
675                         }
676                         delete candidateSeq;
677                         
678                         //report progress
679                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
680                 }
681                 //report progress
682                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
683                 
684                                 
685                 return 0;
686         }
687         catch(exception& e) {
688                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
689                 exit(1);
690         }
691 }
692 #endif
693
694 /**************************************************************************************************/
695
696 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
697         try {
698 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
699                 int process = 0;
700                 int num = 0;
701                 
702                 //loop through and create all the processes you want
703                 while (process != processors) {
704                         int pid = fork();
705                         
706                         if (pid > 0) {
707                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
708                                 process++;
709                         }else if (pid == 0){
710                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
711                                 
712                                 //pass numSeqs to parent
713                                 ofstream out;
714                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
715                                 m->openOutputFile(tempFile, out);
716                                 out << num << endl;
717                                 out.close();
718
719                                 exit(0);
720                         }else { 
721                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
722                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
723                                 exit(0);
724                         }
725                 }
726                 
727                 //force parent to wait until all the processes are done
728                 for (int i=0;i<processors;i++) { 
729                         int temp = processIDS[i];
730                         wait(&temp);
731                 }
732                 
733                 for (int i = 0; i < processIDS.size(); i++) {
734                         ifstream in;
735                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
736                         m->openInputFile(tempFile, in);
737                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
738                         in.close(); m->mothurRemove(tempFile);
739                 }
740                 
741                 return num;
742 #endif          
743         }
744         catch(exception& e) {
745                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
746                 exit(1);
747         }
748 }
749
750 /**************************************************************************************************/
751
752