]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
added current as option is lists of file names, processors now outputted with current...
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){  
15         try {
16                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){  
39         try {
40                 string helpString = "";
41                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50 #ifdef USE_MPI
51                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
52 #endif
53                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57                 helpString += "The chimera.pintail command should be in the following format: \n";
58                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
61                 return helpString;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){ 
70         try {
71                 abort = true; calledHelp = true;
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["chimera"] = tempOutNames;
75                 outputTypes["accnos"] = tempOutNames;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
79                 exit(1);
80         }
81 }
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
84         try {
85                 abort = false; calledHelp = false;   
86                 
87                 //allow user to run help
88                 if(option == "help") { help(); abort = true; calledHelp = true; }
89                 
90                 else {
91                         vector<string> myArray = setParameters();
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         
96                         ValidParameters validParameter("chimera.pintail");
97                         map<string,string>::iterator it;
98                         
99                         //check to make sure all parameters are valid for command
100                         for (it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         vector<string> tempOutNames;
105                         outputTypes["chimera"] = tempOutNames;
106                         outputTypes["accnos"] = tempOutNames;
107                 
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("reference");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
119                                 }
120                                 
121                                 it = parameters.find("conservation");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
127                                 }
128                                 
129                                 it = parameters.find("quantile");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
135                                 }
136                         }
137
138                         
139                         //check for required parameters
140                         fastafile = validParameter.validFile(parameters, "fasta", false);
141                         if (fastafile == "not found") {                                 
142                                 //if there is a current fasta file, use it
143                                 string filename = m->getFastaFile(); 
144                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
145                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
146                         }else { 
147                                 m->splitAtDash(fastafile, fastaFileNames);
148                                 
149                                 //go through files and make sure they are good, if not, then disregard them
150                                 for (int i = 0; i < fastaFileNames.size(); i++) {
151                                         
152                                         bool ignore = false;
153                                         if (fastaFileNames[i] == "current") { 
154                                                 fastaFileNames[i] = m->getFastaFile(); 
155                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
156                                                 else {  
157                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
158                                                         //erase from file list
159                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
160                                                         i--;
161                                                 }
162                                         }
163                                         
164                                         if (!ignore) {
165                                         
166                                                 if (inputDir != "") {
167                                                         string path = m->hasPath(fastaFileNames[i]);
168                                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
170                                                 }
171                 
172                                                 int ableToOpen;
173                                                 ifstream in;
174                                                 
175                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
176                                         
177                                                 //if you can't open it, try default location
178                                                 if (ableToOpen == 1) {
179                                                         if (m->getDefaultPath() != "") { //default path is set
180                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
181                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
182                                                                 ifstream in2;
183                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
184                                                                 in2.close();
185                                                                 fastaFileNames[i] = tryPath;
186                                                         }
187                                                 }
188                                                 
189                                                 if (ableToOpen == 1) {
190                                                         if (m->getOutputDir() != "") { //default path is set
191                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
192                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
193                                                                 ifstream in2;
194                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
195                                                                 in2.close();
196                                                                 fastaFileNames[i] = tryPath;
197                                                         }
198                                                 }
199
200                                                 in.close();
201                                                 
202                                                 if (ableToOpen == 1) { 
203                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
204                                                         //erase from file list
205                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
206                                                         i--;
207                                                 }
208                                         }
209                                 }
210                                 
211                                 //make sure there is at least one valid file left
212                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
213                         }
214                         
215                         string temp;
216                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
217                         filter = m->isTrue(temp);
218                         
219                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
220                         m->setProcessors(temp);
221                         convert(temp, processors);
222                         
223                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
224                         convert(temp, window);
225                         
226                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
227                         convert(temp, increment);
228                         
229                         maskfile = validParameter.validFile(parameters, "mask", false);
230                         if (maskfile == "not found") { maskfile = "";  }        
231                         else if (maskfile != "default")  { 
232                                 if (inputDir != "") {
233                                         string path = m->hasPath(maskfile);
234                                         //if the user has not given a path then, add inputdir. else leave path alone.
235                                         if (path == "") {       maskfile = inputDir + maskfile;         }
236                                 }
237
238                                 ifstream in;
239                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
240                                 if (ableToOpen == 1) { 
241                                         if (m->getDefaultPath() != "") { //default path is set
242                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
243                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
244                                                         ifstream in2;
245                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
246                                                         in2.close();
247                                                         maskfile = tryPath;
248                                         }
249                                 }
250                                 
251                                 if (ableToOpen == 1) {
252                                                 if (m->getOutputDir() != "") { //default path is set
253                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
254                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
255                                                         ifstream in2;
256                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
257                                                         in2.close();
258                                                         maskfile = tryPath;
259                                                 }
260                                 }
261                                 
262                                 in.close();
263                                         
264                                 if (ableToOpen == 1) { 
265                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
266                                                 abort = true;
267                                 }
268                         }
269
270                         
271                         //if the user changes the output directory command factory will send this info to us in the output parameter 
272                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
273                 
274                         templatefile = validParameter.validFile(parameters, "reference", true);
275                         if (templatefile == "not open") { abort = true; }
276                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
277                         
278                         consfile = validParameter.validFile(parameters, "conservation", true);
279                         if (consfile == "not open") { abort = true; }
280                         else if (consfile == "not found") { 
281                                 consfile = "";  
282                                 //check for consfile
283                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
284                                 ifstream FileTest(tempConsFile.c_str());
285                                 if(FileTest){   
286                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
287                                         if (GoodFile) {  
288                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
289                                         }
290                                 }else {
291                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
292                                         ifstream FileTest2(tempConsFile.c_str());
293                                         if(FileTest2){  
294                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
295                                                 if (GoodFile) {  
296                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
297                                                 }
298                                         }
299                                 }
300                         }       
301                         
302                         quanfile = validParameter.validFile(parameters, "quantile", true);
303                         if (quanfile == "not open") { abort = true; }
304                         else if (quanfile == "not found") { quanfile = ""; }
305                 }
306         }
307         catch(exception& e) {
308                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
309                 exit(1);
310         }
311 }
312 //***************************************************************************************************************
313
314 int ChimeraPintailCommand::execute(){
315         try{
316                 
317                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
318                 
319                 for (int s = 0; s < fastaFileNames.size(); s++) {
320                                 
321                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
322
323                         int start = time(NULL); 
324                         
325                         //set user options
326                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
327                         
328                         //check for quantile to save the time
329                         string tempQuan = "";
330                         if ((!filter) && (maskfile == "")) {
331                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
332                         }else if ((!filter) && (maskfile != "")) { 
333                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
334                         }else if ((filter) && (maskfile != "")) { 
335                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
336                         }else if ((filter) && (maskfile == "")) { 
337                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
338                         }
339                         
340                         ifstream FileTest(tempQuan.c_str());
341                         if(FileTest){   
342                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
343                                 if (GoodFile) {  
344                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
345                                 }
346                         }else {
347                                 string tryPath = m->getDefaultPath();
348                                 string tempQuan = "";
349                                 if ((!filter) && (maskfile == "")) {
350                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
351                                 }else if ((!filter) && (maskfile != "")) { 
352                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
353                                 }else if ((filter) && (maskfile != "")) { 
354                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
355                                 }else if ((filter) && (maskfile == "")) { 
356                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
357                                 }
358                                 
359                                 ifstream FileTest2(tempQuan.c_str());
360                                 if(FileTest2){  
361                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
362                                         if (GoodFile) {  
363                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
364                                         }
365                                 }
366                         }
367                         
368                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
369                         
370                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
371                         string outputFileName, accnosFileName;
372                         if (maskfile != "") {
373                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
374                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
375                         }else {
376                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
377                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
378                         }
379                         
380                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
381                         
382                         if (chimera->getUnaligned()) { 
383                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
384                                 delete chimera;
385                                 return 0; 
386                         }
387                         templateSeqsLength = chimera->getLength();
388                 
389                 #ifdef USE_MPI
390                         int pid, numSeqsPerProcessor; 
391                                 int tag = 2001;
392                                 vector<unsigned long int> MPIPos;
393                                 
394                                 MPI_Status status; 
395                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
396                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
397
398                                 MPI_File inMPI;
399                                 MPI_File outMPI;
400                                 MPI_File outMPIAccnos;
401                                 
402                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
403                                 int inMode=MPI_MODE_RDONLY; 
404                                 
405                                 char outFilename[1024];
406                                 strcpy(outFilename, outputFileName.c_str());
407                                 
408                                 char outAccnosFilename[1024];
409                                 strcpy(outAccnosFilename, accnosFileName.c_str());
410                                 
411                                 char inFileName[1024];
412                                 strcpy(inFileName, fastaFileNames[s].c_str());
413
414                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
415                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
416                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
417                                 
418                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
419
420                                 if (pid == 0) { //you are the root process 
421                                                                 
422                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
423                                         
424                                         //send file positions to all processes
425                                         for(int i = 1; i < processors; i++) { 
426                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
427                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
428                                         }
429                                         
430                                         //figure out how many sequences you have to align
431                                         numSeqsPerProcessor = numSeqs / processors;
432                                         int startIndex =  pid * numSeqsPerProcessor;
433                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
434                                 
435                                         //do your part
436                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
437                                         
438                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
439                                         
440                                 }else{ //you are a child process
441                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
442                                         MPIPos.resize(numSeqs+1);
443                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
444                                         
445                                         //figure out how many sequences you have to align
446                                         numSeqsPerProcessor = numSeqs / processors;
447                                         int startIndex =  pid * numSeqsPerProcessor;
448                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
449                                         
450                                         //do your part
451                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
452                                         
453                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
454                                 }
455                                 
456                                 //close files 
457                                 MPI_File_close(&inMPI);
458                                 MPI_File_close(&outMPI);
459                                 MPI_File_close(&outMPIAccnos);
460                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
461                 #else
462                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
463                                 
464                         for (int i = 0; i < (positions.size()-1); i++) {
465                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
466                         }       
467                         
468                         //break up file
469                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
470                                 if(processors == 1){
471                 
472                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
473                                         
474                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
475                                         
476                                 }else{
477                                         processIDS.resize(0);
478                                         
479                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
480                                 
481                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
482                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
483                                                 
484                                         //append output files
485                                         for(int i=1;i<processors;i++){
486                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
487                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
488                                         }
489                                         
490                                         //append output files
491                                         for(int i=1;i<processors;i++){
492                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
493                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
494                                         }
495                                                                                 
496                                         if (m->control_pressed) { 
497                                                 remove(outputFileName.c_str()); 
498                                                 remove(accnosFileName.c_str());
499                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
500                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
501                                                 delete chimera;
502                                                 return 0;
503                                         }
504                                 }
505
506                         #else
507                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
508                                 
509                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
510                         #endif
511                         
512                 #endif  
513                 
514                         delete chimera;
515                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
516                         
517                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
518                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
519                         
520                         m->mothurOutEndLine();
521                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
522                 }
523                 
524                 //set accnos file as new current accnosfile
525                 string current = "";
526                 itTypes = outputTypes.find("accnos");
527                 if (itTypes != outputTypes.end()) {
528                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
529                 }
530                 
531                 m->mothurOutEndLine();
532                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
533                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
534                 m->mothurOutEndLine();
535                         
536                 return 0;
537                 
538         }
539         catch(exception& e) {
540                 m->errorOut(e, "ChimeraPintailCommand", "execute");
541                 exit(1);
542         }
543 }
544 //**********************************************************************************************************************
545
546 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
547         try {
548                 ofstream out;
549                 m->openOutputFile(outputFName, out);
550                 
551                 ofstream out2;
552                 m->openOutputFile(accnos, out2);
553                 
554                 ifstream inFASTA;
555                 m->openInputFile(filename, inFASTA);
556
557                 inFASTA.seekg(filePos->start);
558
559                 bool done = false;
560                 int count = 0;
561         
562                 while (!done) {
563                                 
564                         if (m->control_pressed) {       return 1;       }
565                 
566                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
567                                 
568                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
569                                 
570                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
571                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
572                                 }else{
573                                         //find chimeras
574                                         chimera->getChimeras(candidateSeq);
575                                         
576                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
577                 
578                                         //print results
579                                         chimera->print(out, out2);
580                                 }
581                                 count++;
582                         }
583                         delete candidateSeq;
584                         
585                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
586                                 unsigned long int pos = inFASTA.tellg();
587                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
588                         #else
589                                 if (inFASTA.eof()) { break; }
590                         #endif
591                         
592                         //report progress
593                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
594                 }
595                 //report progress
596                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
597                 
598                 out.close();
599                 out2.close();
600                 inFASTA.close();
601                                 
602                 return count;
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "ChimeraPintailCommand", "driver");
606                 exit(1);
607         }
608 }
609 //**********************************************************************************************************************
610 #ifdef USE_MPI
611 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
612         try {
613                                 
614                 MPI_Status status; 
615                 int pid;
616                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
617                 
618                 for(int i=0;i<num;i++){
619                         
620                         if (m->control_pressed) {       return 1;       }
621                         
622                         //read next sequence
623                         int length = MPIPos[start+i+1] - MPIPos[start+i];
624         
625                         char* buf4 = new char[length];
626                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
627                         
628                         string tempBuf = buf4;
629                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
630                         istringstream iss (tempBuf,istringstream::in);
631                         delete buf4;
632
633                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
634                                 
635                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
636                                 
637                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
638                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
639                                 }else{
640                                         //find chimeras
641                                         chimera->getChimeras(candidateSeq);
642                                         
643                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
644                 
645                                         //print results
646                                         chimera->print(outMPI, outAccMPI);
647                                 }
648                         }
649                         delete candidateSeq;
650                         
651                         //report progress
652                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
653                 }
654                 //report progress
655                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
656                 
657                                 
658                 return 0;
659         }
660         catch(exception& e) {
661                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
662                 exit(1);
663         }
664 }
665 #endif
666
667 /**************************************************************************************************/
668
669 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
670         try {
671 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
672                 int process = 0;
673                 int num = 0;
674                 
675                 //loop through and create all the processes you want
676                 while (process != processors) {
677                         int pid = fork();
678                         
679                         if (pid > 0) {
680                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
681                                 process++;
682                         }else if (pid == 0){
683                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
684                                 
685                                 //pass numSeqs to parent
686                                 ofstream out;
687                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
688                                 m->openOutputFile(tempFile, out);
689                                 out << num << endl;
690                                 out.close();
691
692                                 exit(0);
693                         }else { 
694                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
695                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
696                                 exit(0);
697                         }
698                 }
699                 
700                 //force parent to wait until all the processes are done
701                 for (int i=0;i<processors;i++) { 
702                         int temp = processIDS[i];
703                         wait(&temp);
704                 }
705                 
706                 for (int i = 0; i < processIDS.size(); i++) {
707                         ifstream in;
708                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
709                         m->openInputFile(tempFile, in);
710                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
711                         in.close(); remove(tempFile.c_str());
712                 }
713                 
714                 return num;
715 #endif          
716         }
717         catch(exception& e) {
718                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
719                 exit(1);
720         }
721 }
722
723 /**************************************************************************************************/
724
725