]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
working on current change
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){  
15         try {
16                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){  
39         try {
40                 string helpString = "";
41                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50 #ifdef USE_MPI
51                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
52 #endif
53                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57                 helpString += "The chimera.pintail command should be in the following format: \n";
58                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
61                 return helpString;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){ 
70         try {
71                 abort = true; calledHelp = true;
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["chimera"] = tempOutNames;
75                 outputTypes["accnos"] = tempOutNames;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
79                 exit(1);
80         }
81 }
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
84         try {
85                 abort = false; calledHelp = false;   
86                 
87                 //allow user to run help
88                 if(option == "help") { help(); abort = true; calledHelp = true; }
89                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90                 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string,string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter("chimera.pintail");
98                         map<string,string>::iterator it;
99                         
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         vector<string> tempOutNames;
106                         outputTypes["chimera"] = tempOutNames;
107                         outputTypes["accnos"] = tempOutNames;
108                 
109                         //if the user changes the input directory command factory will send this info to us in the output parameter 
110                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                                 string path;
114                                 it = parameters.find("reference");
115                                 //user has given a template file
116                                 if(it != parameters.end()){ 
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
120                                 }
121                                 
122                                 it = parameters.find("conservation");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
128                                 }
129                                 
130                                 it = parameters.find("quantile");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
136                                 }
137                         }
138
139                         
140                         //check for required parameters
141                         fastafile = validParameter.validFile(parameters, "fasta", false);
142                         if (fastafile == "not found") {                                 
143                                 //if there is a current fasta file, use it
144                                 string filename = m->getFastaFile(); 
145                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
146                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
147                         }else { 
148                                 m->splitAtDash(fastafile, fastaFileNames);
149                                 
150                                 //go through files and make sure they are good, if not, then disregard them
151                                 for (int i = 0; i < fastaFileNames.size(); i++) {
152                                         
153                                         bool ignore = false;
154                                         if (fastaFileNames[i] == "current") { 
155                                                 fastaFileNames[i] = m->getFastaFile(); 
156                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
157                                                 else {  
158                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
159                                                         //erase from file list
160                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
161                                                         i--;
162                                                 }
163                                         }
164                                         
165                                         if (!ignore) {
166                                         
167                                                 if (inputDir != "") {
168                                                         string path = m->hasPath(fastaFileNames[i]);
169                                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
171                                                 }
172                 
173                                                 int ableToOpen;
174                                                 ifstream in;
175                                                 
176                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
177                                         
178                                                 //if you can't open it, try default location
179                                                 if (ableToOpen == 1) {
180                                                         if (m->getDefaultPath() != "") { //default path is set
181                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
182                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
183                                                                 ifstream in2;
184                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
185                                                                 in2.close();
186                                                                 fastaFileNames[i] = tryPath;
187                                                         }
188                                                 }
189                                                 
190                                                 if (ableToOpen == 1) {
191                                                         if (m->getOutputDir() != "") { //default path is set
192                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
193                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
194                                                                 ifstream in2;
195                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
196                                                                 in2.close();
197                                                                 fastaFileNames[i] = tryPath;
198                                                         }
199                                                 }
200
201                                                 in.close();
202                                                 
203                                                 if (ableToOpen == 1) { 
204                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
205                                                         //erase from file list
206                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
207                                                         i--;
208                                                 }else {
209                                                         m->setFastaFile(fastaFileNames[i]);
210                                                 }
211                                         }
212                                 }
213                                 
214                                 //make sure there is at least one valid file left
215                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
216                         }
217                         
218                         string temp;
219                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
220                         filter = m->isTrue(temp);
221                         
222                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
223                         m->setProcessors(temp);
224                         convert(temp, processors);
225                         
226                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
227                         convert(temp, window);
228                         
229                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
230                         convert(temp, increment);
231                         
232                         maskfile = validParameter.validFile(parameters, "mask", false);
233                         if (maskfile == "not found") { maskfile = "";  }        
234                         else if (maskfile != "default")  { 
235                                 if (inputDir != "") {
236                                         string path = m->hasPath(maskfile);
237                                         //if the user has not given a path then, add inputdir. else leave path alone.
238                                         if (path == "") {       maskfile = inputDir + maskfile;         }
239                                 }
240
241                                 ifstream in;
242                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
243                                 if (ableToOpen == 1) { 
244                                         if (m->getDefaultPath() != "") { //default path is set
245                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
246                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
247                                                         ifstream in2;
248                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
249                                                         in2.close();
250                                                         maskfile = tryPath;
251                                         }
252                                 }
253                                 
254                                 if (ableToOpen == 1) {
255                                                 if (m->getOutputDir() != "") { //default path is set
256                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
257                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
258                                                         ifstream in2;
259                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
260                                                         in2.close();
261                                                         maskfile = tryPath;
262                                                 }
263                                 }
264                                 
265                                 in.close();
266                                         
267                                 if (ableToOpen == 1) { 
268                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
269                                                 abort = true;
270                                 }
271                         }
272
273                         
274                         //if the user changes the output directory command factory will send this info to us in the output parameter 
275                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
276                 
277                         templatefile = validParameter.validFile(parameters, "reference", true);
278                         if (templatefile == "not open") { abort = true; }
279                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
280                         
281                         consfile = validParameter.validFile(parameters, "conservation", true);
282                         if (consfile == "not open") { abort = true; }
283                         else if (consfile == "not found") { 
284                                 consfile = "";  
285                                 //check for consfile
286                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
287                                 ifstream FileTest(tempConsFile.c_str());
288                                 if(FileTest){   
289                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
290                                         if (GoodFile) {  
291                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
292                                         }
293                                 }else {
294                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
295                                         ifstream FileTest2(tempConsFile.c_str());
296                                         if(FileTest2){  
297                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
298                                                 if (GoodFile) {  
299                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
300                                                 }
301                                         }
302                                 }
303                         }       
304                         
305                         quanfile = validParameter.validFile(parameters, "quantile", true);
306                         if (quanfile == "not open") { abort = true; }
307                         else if (quanfile == "not found") { quanfile = ""; }
308                 }
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
312                 exit(1);
313         }
314 }
315 //***************************************************************************************************************
316
317 int ChimeraPintailCommand::execute(){
318         try{
319                 
320                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
321                 
322                 for (int s = 0; s < fastaFileNames.size(); s++) {
323                                 
324                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
325
326                         int start = time(NULL); 
327                         
328                         //set user options
329                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
330                         
331                         //check for quantile to save the time
332                         string tempQuan = "";
333                         if ((!filter) && (maskfile == "")) {
334                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
335                         }else if ((!filter) && (maskfile != "")) { 
336                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
337                         }else if ((filter) && (maskfile != "")) { 
338                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
339                         }else if ((filter) && (maskfile == "")) { 
340                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
341                         }
342                         
343                         ifstream FileTest(tempQuan.c_str());
344                         if(FileTest){   
345                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
346                                 if (GoodFile) {  
347                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
348                                 }
349                         }else {
350                                 string tryPath = m->getDefaultPath();
351                                 string tempQuan = "";
352                                 if ((!filter) && (maskfile == "")) {
353                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
354                                 }else if ((!filter) && (maskfile != "")) { 
355                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
356                                 }else if ((filter) && (maskfile != "")) { 
357                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
358                                 }else if ((filter) && (maskfile == "")) { 
359                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
360                                 }
361                                 
362                                 ifstream FileTest2(tempQuan.c_str());
363                                 if(FileTest2){  
364                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
365                                         if (GoodFile) {  
366                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
367                                         }
368                                 }
369                         }
370                         
371                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
372                         
373                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
374                         string outputFileName, accnosFileName;
375                         if (maskfile != "") {
376                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
377                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
378                         }else {
379                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
380                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
381                         }
382                         
383                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
384                         
385                         if (chimera->getUnaligned()) { 
386                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
387                                 delete chimera;
388                                 return 0; 
389                         }
390                         templateSeqsLength = chimera->getLength();
391                 
392                 #ifdef USE_MPI
393                         int pid, numSeqsPerProcessor; 
394                                 int tag = 2001;
395                                 vector<unsigned long int> MPIPos;
396                                 
397                                 MPI_Status status; 
398                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
399                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
400
401                                 MPI_File inMPI;
402                                 MPI_File outMPI;
403                                 MPI_File outMPIAccnos;
404                                 
405                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
406                                 int inMode=MPI_MODE_RDONLY; 
407                                 
408                                 char outFilename[1024];
409                                 strcpy(outFilename, outputFileName.c_str());
410                                 
411                                 char outAccnosFilename[1024];
412                                 strcpy(outAccnosFilename, accnosFileName.c_str());
413                                 
414                                 char inFileName[1024];
415                                 strcpy(inFileName, fastaFileNames[s].c_str());
416
417                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
418                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
419                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
420                                 
421                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
422
423                                 if (pid == 0) { //you are the root process 
424                                                                 
425                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
426                                         
427                                         //send file positions to all processes
428                                         for(int i = 1; i < processors; i++) { 
429                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
430                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
431                                         }
432                                         
433                                         //figure out how many sequences you have to align
434                                         numSeqsPerProcessor = numSeqs / processors;
435                                         int startIndex =  pid * numSeqsPerProcessor;
436                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
437                                 
438                                         //do your part
439                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
440                                         
441                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
442                                         
443                                 }else{ //you are a child process
444                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
445                                         MPIPos.resize(numSeqs+1);
446                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
447                                         
448                                         //figure out how many sequences you have to align
449                                         numSeqsPerProcessor = numSeqs / processors;
450                                         int startIndex =  pid * numSeqsPerProcessor;
451                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
452                                         
453                                         //do your part
454                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
455                                         
456                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
457                                 }
458                                 
459                                 //close files 
460                                 MPI_File_close(&inMPI);
461                                 MPI_File_close(&outMPI);
462                                 MPI_File_close(&outMPIAccnos);
463                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
464                 #else
465                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
466                                 
467                         for (int i = 0; i < (positions.size()-1); i++) {
468                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
469                         }       
470                         
471                         //break up file
472                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
473                                 if(processors == 1){
474                 
475                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
476                                         
477                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
478                                         
479                                 }else{
480                                         processIDS.resize(0);
481                                         
482                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
483                                 
484                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
485                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
486                                                 
487                                         //append output files
488                                         for(int i=1;i<processors;i++){
489                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
490                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
491                                         }
492                                         
493                                         //append output files
494                                         for(int i=1;i<processors;i++){
495                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
496                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
497                                         }
498                                                                                 
499                                         if (m->control_pressed) { 
500                                                 remove(outputFileName.c_str()); 
501                                                 remove(accnosFileName.c_str());
502                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
503                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
504                                                 delete chimera;
505                                                 return 0;
506                                         }
507                                 }
508
509                         #else
510                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
511                                 
512                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
513                         #endif
514                         
515                 #endif  
516                 
517                         delete chimera;
518                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
519                         
520                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
521                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
522                         
523                         m->mothurOutEndLine();
524                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
525                 }
526                 
527                 //set accnos file as new current accnosfile
528                 string current = "";
529                 itTypes = outputTypes.find("accnos");
530                 if (itTypes != outputTypes.end()) {
531                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
532                 }
533                 
534                 m->mothurOutEndLine();
535                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
536                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
537                 m->mothurOutEndLine();
538                         
539                 return 0;
540                 
541         }
542         catch(exception& e) {
543                 m->errorOut(e, "ChimeraPintailCommand", "execute");
544                 exit(1);
545         }
546 }
547 //**********************************************************************************************************************
548
549 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
550         try {
551                 ofstream out;
552                 m->openOutputFile(outputFName, out);
553                 
554                 ofstream out2;
555                 m->openOutputFile(accnos, out2);
556                 
557                 ifstream inFASTA;
558                 m->openInputFile(filename, inFASTA);
559
560                 inFASTA.seekg(filePos->start);
561
562                 bool done = false;
563                 int count = 0;
564         
565                 while (!done) {
566                                 
567                         if (m->control_pressed) {       return 1;       }
568                 
569                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
570                                 
571                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
572                                 
573                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
574                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
575                                 }else{
576                                         //find chimeras
577                                         chimera->getChimeras(candidateSeq);
578                                         
579                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
580                 
581                                         //print results
582                                         chimera->print(out, out2);
583                                 }
584                                 count++;
585                         }
586                         delete candidateSeq;
587                         
588                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589                                 unsigned long int pos = inFASTA.tellg();
590                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
591                         #else
592                                 if (inFASTA.eof()) { break; }
593                         #endif
594                         
595                         //report progress
596                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
597                 }
598                 //report progress
599                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
600                 
601                 out.close();
602                 out2.close();
603                 inFASTA.close();
604                                 
605                 return count;
606         }
607         catch(exception& e) {
608                 m->errorOut(e, "ChimeraPintailCommand", "driver");
609                 exit(1);
610         }
611 }
612 //**********************************************************************************************************************
613 #ifdef USE_MPI
614 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
615         try {
616                                 
617                 MPI_Status status; 
618                 int pid;
619                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
620                 
621                 for(int i=0;i<num;i++){
622                         
623                         if (m->control_pressed) {       return 1;       }
624                         
625                         //read next sequence
626                         int length = MPIPos[start+i+1] - MPIPos[start+i];
627         
628                         char* buf4 = new char[length];
629                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
630                         
631                         string tempBuf = buf4;
632                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
633                         istringstream iss (tempBuf,istringstream::in);
634                         delete buf4;
635
636                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
637                                 
638                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
639                                 
640                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
641                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
642                                 }else{
643                                         //find chimeras
644                                         chimera->getChimeras(candidateSeq);
645                                         
646                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
647                 
648                                         //print results
649                                         chimera->print(outMPI, outAccMPI);
650                                 }
651                         }
652                         delete candidateSeq;
653                         
654                         //report progress
655                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
656                 }
657                 //report progress
658                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
659                 
660                                 
661                 return 0;
662         }
663         catch(exception& e) {
664                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
665                 exit(1);
666         }
667 }
668 #endif
669
670 /**************************************************************************************************/
671
672 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
673         try {
674 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
675                 int process = 0;
676                 int num = 0;
677                 
678                 //loop through and create all the processes you want
679                 while (process != processors) {
680                         int pid = fork();
681                         
682                         if (pid > 0) {
683                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
684                                 process++;
685                         }else if (pid == 0){
686                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
687                                 
688                                 //pass numSeqs to parent
689                                 ofstream out;
690                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
691                                 m->openOutputFile(tempFile, out);
692                                 out << num << endl;
693                                 out.close();
694
695                                 exit(0);
696                         }else { 
697                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
698                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
699                                 exit(0);
700                         }
701                 }
702                 
703                 //force parent to wait until all the processes are done
704                 for (int i=0;i<processors;i++) { 
705                         int temp = processIDS[i];
706                         wait(&temp);
707                 }
708                 
709                 for (int i = 0; i < processIDS.size(); i++) {
710                         ifstream in;
711                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
712                         m->openInputFile(tempFile, in);
713                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
714                         in.close(); remove(tempFile.c_str());
715                 }
716                 
717                 return num;
718 #endif          
719         }
720         catch(exception& e) {
721                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
722                 exit(1);
723         }
724 }
725
726 /**************************************************************************************************/
727
728