]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){  
16         try {
17                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
51                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
52 #ifdef USE_MPI
53                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
54 #endif
55                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60                 helpString += "The chimera.pintail command should be in the following format: \n";
61                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
64                 return helpString;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 ChimeraPintailCommand::ChimeraPintailCommand(){ 
73         try {
74                 abort = true; calledHelp = true;
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["chimera"] = tempOutNames;
78                 outputTypes["accnos"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82                 exit(1);
83         }
84 }
85 //***************************************************************************************************************
86 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
87         try {
88                 abort = false; calledHelp = false;   
89                 rdb = ReferenceDB::getInstance();
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter("chimera.pintail");
102                         map<string,string>::iterator it;
103                         
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         vector<string> tempOutNames;
110                         outputTypes["chimera"] = tempOutNames;
111                         outputTypes["accnos"] = tempOutNames;
112                 
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("reference");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
124                                 }
125                                 
126                                 it = parameters.find("conservation");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
132                                 }
133                                 
134                                 it = parameters.find("quantile");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
140                                 }
141                         }
142
143                         
144                         //check for required parameters
145                         fastafile = validParameter.validFile(parameters, "fasta", false);
146                         if (fastafile == "not found") {                                 
147                                 //if there is a current fasta file, use it
148                                 string filename = m->getFastaFile(); 
149                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
151                         }else { 
152                                 m->splitAtDash(fastafile, fastaFileNames);
153                                 
154                                 //go through files and make sure they are good, if not, then disregard them
155                                 for (int i = 0; i < fastaFileNames.size(); i++) {
156                                         
157                                         bool ignore = false;
158                                         if (fastaFileNames[i] == "current") { 
159                                                 fastaFileNames[i] = m->getFastaFile(); 
160                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
161                                                 else {  
162                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
163                                                         //erase from file list
164                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
165                                                         i--;
166                                                 }
167                                         }
168                                         
169                                         if (!ignore) {
170                                         
171                                                 if (inputDir != "") {
172                                                         string path = m->hasPath(fastaFileNames[i]);
173                                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
175                                                 }
176                 
177                                                 int ableToOpen;
178                                                 ifstream in;
179                                                 
180                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
181                                         
182                                                 //if you can't open it, try default location
183                                                 if (ableToOpen == 1) {
184                                                         if (m->getDefaultPath() != "") { //default path is set
185                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
187                                                                 ifstream in2;
188                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
189                                                                 in2.close();
190                                                                 fastaFileNames[i] = tryPath;
191                                                         }
192                                                 }
193                                                 
194                                                 if (ableToOpen == 1) {
195                                                         if (m->getOutputDir() != "") { //default path is set
196                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
198                                                                 ifstream in2;
199                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
200                                                                 in2.close();
201                                                                 fastaFileNames[i] = tryPath;
202                                                         }
203                                                 }
204
205                                                 in.close();
206                                                 
207                                                 if (ableToOpen == 1) { 
208                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
209                                                         //erase from file list
210                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
211                                                         i--;
212                                                 }else {
213                                                         m->setFastaFile(fastaFileNames[i]);
214                                                 }
215                                         }
216                                 }
217                                 
218                                 //make sure there is at least one valid file left
219                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
220                         }
221                         
222                         string temp;
223                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
224                         filter = m->isTrue(temp);
225                         
226                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
227                         m->setProcessors(temp);
228                         convert(temp, processors);
229                         
230                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
231                         convert(temp, window);
232                         
233                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
234                         convert(temp, increment);
235                         
236                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
237                         save = m->isTrue(temp); 
238                         rdb->save = save; 
239                         if (save) { //clear out old references
240                                 rdb->clearMemory();     
241                         }
242                         
243                         //this has to go after save so that if the user sets save=t and provides no reference we abort
244                         templatefile = validParameter.validFile(parameters, "reference", true);
245                         if (templatefile == "not found") { 
246                                 //check for saved reference sequences
247                                 if (rdb->referenceSeqs.size() != 0) {
248                                         templatefile = "saved";
249                                 }else {
250                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
251                                         m->mothurOutEndLine();
252                                         abort = true; 
253                                 }
254                         }else if (templatefile == "not open") { abort = true; } 
255                         else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
256                         
257                         
258                         maskfile = validParameter.validFile(parameters, "mask", false);
259                         if (maskfile == "not found") { maskfile = "";  }        
260                         else if (maskfile != "default")  { 
261                                 if (inputDir != "") {
262                                         string path = m->hasPath(maskfile);
263                                         //if the user has not given a path then, add inputdir. else leave path alone.
264                                         if (path == "") {       maskfile = inputDir + maskfile;         }
265                                 }
266
267                                 ifstream in;
268                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
269                                 if (ableToOpen == 1) { 
270                                         if (m->getDefaultPath() != "") { //default path is set
271                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
272                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
273                                                         ifstream in2;
274                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
275                                                         in2.close();
276                                                         maskfile = tryPath;
277                                         }
278                                 }
279                                 
280                                 if (ableToOpen == 1) {
281                                                 if (m->getOutputDir() != "") { //default path is set
282                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
283                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
284                                                         ifstream in2;
285                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
286                                                         in2.close();
287                                                         maskfile = tryPath;
288                                                 }
289                                 }
290                                 
291                                 in.close();
292                                         
293                                 if (ableToOpen == 1) { 
294                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
295                                                 abort = true;
296                                 }
297                         }
298
299                         
300                         //if the user changes the output directory command factory will send this info to us in the output parameter 
301                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
302                 
303                         consfile = validParameter.validFile(parameters, "conservation", true);
304                         if (consfile == "not open") { abort = true; }
305                         else if (consfile == "not found") { 
306                                 consfile = "";  
307                                 //check for consfile
308                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
309                                 ifstream FileTest(tempConsFile.c_str());
310                                 if(FileTest){   
311                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
312                                         if (GoodFile) {  
313                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
314                                         }
315                                 }else {
316                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
317                                         ifstream FileTest2(tempConsFile.c_str());
318                                         if(FileTest2){  
319                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
320                                                 if (GoodFile) {  
321                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
322                                                 }
323                                         }
324                                 }
325                         }       
326                         
327                         quanfile = validParameter.validFile(parameters, "quantile", true);
328                         if (quanfile == "not open") { abort = true; }
329                         else if (quanfile == "not found") { quanfile = ""; }
330                 }
331         }
332         catch(exception& e) {
333                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
334                 exit(1);
335         }
336 }
337 //***************************************************************************************************************
338
339 int ChimeraPintailCommand::execute(){
340         try{
341                 
342                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
343                 
344                 for (int s = 0; s < fastaFileNames.size(); s++) {
345                                 
346                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
347
348                         int start = time(NULL); 
349                         
350                         //set user options
351                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
352                         
353                         //check for quantile to save the time
354                         string baseName = templatefile;
355                         if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
356                         
357                         string tempQuan = "";
358                         if ((!filter) && (maskfile == "")) {
359                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
360                         }else if ((!filter) && (maskfile != "")) { 
361                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
362                         }else if ((filter) && (maskfile != "")) { 
363                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
364                         }else if ((filter) && (maskfile == "")) { 
365                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
366                         }
367                         
368                         ifstream FileTest(tempQuan.c_str());
369                         if(FileTest){   
370                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
371                                 if (GoodFile) {  
372                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
373                                 }
374                         }else {
375                                 string tryPath = m->getDefaultPath();
376                                 string tempQuan = "";
377                                 if ((!filter) && (maskfile == "")) {
378                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
379                                 }else if ((!filter) && (maskfile != "")) { 
380                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
381                                 }else if ((filter) && (maskfile != "")) { 
382                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
383                                 }else if ((filter) && (maskfile == "")) { 
384                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
385                                 }
386                                 
387                                 ifstream FileTest2(tempQuan.c_str());
388                                 if(FileTest2){  
389                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
390                                         if (GoodFile) {  
391                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
392                                         }
393                                 }
394                         }
395                         
396                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
397                         
398                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
399                         string outputFileName, accnosFileName;
400                         if (maskfile != "") {
401                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
402                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
403                         }else {
404                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
405                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
406                         }
407                         
408                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
409                         
410                         if (chimera->getUnaligned()) { 
411                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
412                                 delete chimera;
413                                 return 0; 
414                         }
415                         templateSeqsLength = chimera->getLength();
416                 
417                 #ifdef USE_MPI
418                         int pid, numSeqsPerProcessor; 
419                                 int tag = 2001;
420                                 vector<unsigned long int> MPIPos;
421                                 
422                                 MPI_Status status; 
423                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
424                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
425
426                                 MPI_File inMPI;
427                                 MPI_File outMPI;
428                                 MPI_File outMPIAccnos;
429                                 
430                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
431                                 int inMode=MPI_MODE_RDONLY; 
432                                 
433                                 char outFilename[1024];
434                                 strcpy(outFilename, outputFileName.c_str());
435                                 
436                                 char outAccnosFilename[1024];
437                                 strcpy(outAccnosFilename, accnosFileName.c_str());
438                                 
439                                 char inFileName[1024];
440                                 strcpy(inFileName, fastaFileNames[s].c_str());
441
442                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
443                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
444                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
445                                 
446                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
447
448                                 if (pid == 0) { //you are the root process 
449                                                                 
450                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
451                                         
452                                         //send file positions to all processes
453                                         for(int i = 1; i < processors; i++) { 
454                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
455                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
456                                         }
457                                         
458                                         //figure out how many sequences you have to align
459                                         numSeqsPerProcessor = numSeqs / processors;
460                                         int startIndex =  pid * numSeqsPerProcessor;
461                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
462                                 
463                                         //do your part
464                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
465                                         
466                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
467                                         
468                                 }else{ //you are a child process
469                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
470                                         MPIPos.resize(numSeqs+1);
471                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
472                                         
473                                         //figure out how many sequences you have to align
474                                         numSeqsPerProcessor = numSeqs / processors;
475                                         int startIndex =  pid * numSeqsPerProcessor;
476                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
477                                         
478                                         //do your part
479                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
480                                         
481                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
482                                 }
483                                 
484                                 //close files 
485                                 MPI_File_close(&inMPI);
486                                 MPI_File_close(&outMPI);
487                                 MPI_File_close(&outMPIAccnos);
488                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
489                 #else
490                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
491                                 
492                         for (int i = 0; i < (positions.size()-1); i++) {
493                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
494                         }       
495                         
496                         //break up file
497                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
498                                 if(processors == 1){
499                 
500                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
501                                         
502                                         if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
503                                         
504                                 }else{
505                                         processIDS.resize(0);
506                                         
507                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
508                                 
509                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
510                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
511                                                 
512                                         //append output files
513                                         for(int i=1;i<processors;i++){
514                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
515                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
516                                         }
517                                         
518                                         //append output files
519                                         for(int i=1;i<processors;i++){
520                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
521                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
522                                         }
523                                                                                 
524                                         if (m->control_pressed) { 
525                                                 m->mothurRemove(outputFileName); 
526                                                 m->mothurRemove(accnosFileName);
527                                                 for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
528                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
529                                                 delete chimera;
530                                                 return 0;
531                                         }
532                                 }
533
534                         #else
535                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
536                                 
537                                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
538                         #endif
539                         
540                 #endif  
541                 
542                         delete chimera;
543                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
544                         
545                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
546                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
547                         
548                         m->mothurOutEndLine();
549                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
550                 }
551                 
552                 //set accnos file as new current accnosfile
553                 string current = "";
554                 itTypes = outputTypes.find("accnos");
555                 if (itTypes != outputTypes.end()) {
556                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
557                 }
558                 
559                 m->mothurOutEndLine();
560                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
561                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
562                 m->mothurOutEndLine();
563                         
564                 return 0;
565                 
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "ChimeraPintailCommand", "execute");
569                 exit(1);
570         }
571 }
572 //**********************************************************************************************************************
573
574 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
575         try {
576                 ofstream out;
577                 m->openOutputFile(outputFName, out);
578                 
579                 ofstream out2;
580                 m->openOutputFile(accnos, out2);
581                 
582                 ifstream inFASTA;
583                 m->openInputFile(filename, inFASTA);
584
585                 inFASTA.seekg(filePos->start);
586
587                 bool done = false;
588                 int count = 0;
589         
590                 while (!done) {
591                                 
592                         if (m->control_pressed) {       return 1;       }
593                 
594                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
595                                 
596                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
597                                 
598                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
599                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
600                                 }else{
601                                         //find chimeras
602                                         chimera->getChimeras(candidateSeq);
603                                         
604                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
605                 
606                                         //print results
607                                         chimera->print(out, out2);
608                                 }
609                                 count++;
610                         }
611                         delete candidateSeq;
612                         
613                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
614                                 unsigned long int pos = inFASTA.tellg();
615                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
616                         #else
617                                 if (inFASTA.eof()) { break; }
618                         #endif
619                         
620                         //report progress
621                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
622                 }
623                 //report progress
624                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
625                 
626                 out.close();
627                 out2.close();
628                 inFASTA.close();
629                                 
630                 return count;
631         }
632         catch(exception& e) {
633                 m->errorOut(e, "ChimeraPintailCommand", "driver");
634                 exit(1);
635         }
636 }
637 //**********************************************************************************************************************
638 #ifdef USE_MPI
639 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
640         try {
641                                 
642                 MPI_Status status; 
643                 int pid;
644                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
645                 
646                 for(int i=0;i<num;i++){
647                         
648                         if (m->control_pressed) {       return 1;       }
649                         
650                         //read next sequence
651                         int length = MPIPos[start+i+1] - MPIPos[start+i];
652         
653                         char* buf4 = new char[length];
654                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
655                         
656                         string tempBuf = buf4;
657                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
658                         istringstream iss (tempBuf,istringstream::in);
659                         delete buf4;
660
661                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
662                                 
663                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
664                                 
665                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
666                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
667                                 }else{
668                                         //find chimeras
669                                         chimera->getChimeras(candidateSeq);
670                                         
671                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
672                 
673                                         //print results
674                                         chimera->print(outMPI, outAccMPI);
675                                 }
676                         }
677                         delete candidateSeq;
678                         
679                         //report progress
680                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
681                 }
682                 //report progress
683                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
684                 
685                                 
686                 return 0;
687         }
688         catch(exception& e) {
689                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
690                 exit(1);
691         }
692 }
693 #endif
694
695 /**************************************************************************************************/
696
697 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
698         try {
699 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
700                 int process = 0;
701                 int num = 0;
702                 
703                 //loop through and create all the processes you want
704                 while (process != processors) {
705                         int pid = fork();
706                         
707                         if (pid > 0) {
708                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
709                                 process++;
710                         }else if (pid == 0){
711                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
712                                 
713                                 //pass numSeqs to parent
714                                 ofstream out;
715                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
716                                 m->openOutputFile(tempFile, out);
717                                 out << num << endl;
718                                 out.close();
719
720                                 exit(0);
721                         }else { 
722                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
723                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
724                                 exit(0);
725                         }
726                 }
727                 
728                 //force parent to wait until all the processes are done
729                 for (int i=0;i<processors;i++) { 
730                         int temp = processIDS[i];
731                         wait(&temp);
732                 }
733                 
734                 for (int i = 0; i < processIDS.size(); i++) {
735                         ifstream in;
736                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
737                         m->openInputFile(tempFile, in);
738                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
739                         in.close(); m->mothurRemove(tempFile);
740                 }
741                 
742                 return num;
743 #endif          
744         }
745         catch(exception& e) {
746                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
747                 exit(1);
748         }
749 }
750
751 /**************************************************************************************************/
752
753