]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::getValidParameters(){     
15         try {
16                 string AlignArray[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ChimeraPintailCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ChimeraPintailCommand::ChimeraPintailCommand(){ 
27         try {
28                 abort = true; calledHelp = true;
29                 vector<string> tempOutNames;
30                 outputTypes["chimera"] = tempOutNames;
31                 outputTypes["accnos"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> ChimeraPintailCommand::getRequiredParameters(){  
40         try {
41                 string AlignArray[] =  {"template","fasta"};
42                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> ChimeraPintailCommand::getRequiredFiles(){       
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //***************************************************************************************************************
62 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
63         try {
64                 abort = false; calledHelp = false;   
65                 
66                 //allow user to run help
67                 if(option == "help") { help(); abort = true; calledHelp = true; }
68                 
69                 else {
70                         //valid paramters for this command
71                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
72                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
73                         
74                         OptionParser parser(option);
75                         map<string,string> parameters = parser.getParameters();
76                         
77                         ValidParameters validParameter("chimera.pintail");
78                         map<string,string>::iterator it;
79                         
80                         //check to make sure all parameters are valid for command
81                         for (it = parameters.begin(); it != parameters.end(); it++) { 
82                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
83                         }
84                         
85                         vector<string> tempOutNames;
86                         outputTypes["chimera"] = tempOutNames;
87                         outputTypes["accnos"] = tempOutNames;
88                 
89                         //if the user changes the input directory command factory will send this info to us in the output parameter 
90                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
91                         if (inputDir == "not found"){   inputDir = "";          }
92                         else {
93                                 string path;
94                                 it = parameters.find("template");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = m->hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
100                                 }
101                                 
102                                 it = parameters.find("conservation");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("quantile");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
116                                 }
117                         }
118
119                         
120                         //check for required parameters
121                         fastafile = validParameter.validFile(parameters, "fasta", false);
122                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
123                         else { 
124                                 m->splitAtDash(fastafile, fastaFileNames);
125                                 
126                                 //go through files and make sure they are good, if not, then disregard them
127                                 for (int i = 0; i < fastaFileNames.size(); i++) {
128                                         if (inputDir != "") {
129                                                 string path = m->hasPath(fastaFileNames[i]);
130                                                 //if the user has not given a path then, add inputdir. else leave path alone.
131                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
132                                         }
133         
134                                         int ableToOpen;
135                                         ifstream in;
136                                         
137                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
138                                 
139                                         //if you can't open it, try default location
140                                         if (ableToOpen == 1) {
141                                                 if (m->getDefaultPath() != "") { //default path is set
142                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
143                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
144                                                         ifstream in2;
145                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
146                                                         in2.close();
147                                                         fastaFileNames[i] = tryPath;
148                                                 }
149                                         }
150                                         
151                                         if (ableToOpen == 1) {
152                                                 if (m->getOutputDir() != "") { //default path is set
153                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
154                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
155                                                         ifstream in2;
156                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
157                                                         in2.close();
158                                                         fastaFileNames[i] = tryPath;
159                                                 }
160                                         }
161
162                                         in.close();
163                                         
164                                         if (ableToOpen == 1) { 
165                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
166                                                 //erase from file list
167                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
168                                                 i--;
169                                         }
170                                 }
171                                 
172                                 //make sure there is at least one valid file left
173                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
174                         }
175                         
176                         string temp;
177                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
178                         filter = m->isTrue(temp);
179                         
180                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
181                         convert(temp, processors);
182                         
183                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
184                         convert(temp, window);
185                         
186                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
187                         convert(temp, increment);
188                         
189                         maskfile = validParameter.validFile(parameters, "mask", false);
190                         if (maskfile == "not found") { maskfile = "";  }        
191                         else if (maskfile != "default")  { 
192                                 if (inputDir != "") {
193                                         string path = m->hasPath(maskfile);
194                                         //if the user has not given a path then, add inputdir. else leave path alone.
195                                         if (path == "") {       maskfile = inputDir + maskfile;         }
196                                 }
197
198                                 ifstream in;
199                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
200                                 if (ableToOpen == 1) { 
201                                         if (m->getDefaultPath() != "") { //default path is set
202                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
203                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
204                                                         ifstream in2;
205                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
206                                                         in2.close();
207                                                         maskfile = tryPath;
208                                         }
209                                 }
210                                 
211                                 if (ableToOpen == 1) {
212                                                 if (m->getOutputDir() != "") { //default path is set
213                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
214                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
215                                                         ifstream in2;
216                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
217                                                         in2.close();
218                                                         maskfile = tryPath;
219                                                 }
220                                 }
221                                 
222                                 in.close();
223                                         
224                                 if (ableToOpen == 1) { 
225                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
226                                                 abort = true;
227                                 }
228                         }
229
230                         
231                         //if the user changes the output directory command factory will send this info to us in the output parameter 
232                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
233                 
234                         templatefile = validParameter.validFile(parameters, "template", true);
235                         if (templatefile == "not open") { abort = true; }
236                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
237                         
238                         consfile = validParameter.validFile(parameters, "conservation", true);
239                         if (consfile == "not open") { abort = true; }
240                         else if (consfile == "not found") { 
241                                 consfile = "";  
242                                 //check for consfile
243                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
244                                 ifstream FileTest(tempConsFile.c_str());
245                                 if(FileTest){   
246                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
247                                         if (GoodFile) {  
248                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
249                                         }
250                                 }else {
251                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
252                                         ifstream FileTest2(tempConsFile.c_str());
253                                         if(FileTest2){  
254                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
255                                                 if (GoodFile) {  
256                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
257                                                 }
258                                         }
259                                 }
260                         }       
261                         
262                         quanfile = validParameter.validFile(parameters, "quantile", true);
263                         if (quanfile == "not open") { abort = true; }
264                         else if (quanfile == "not found") { quanfile = ""; }
265                 }
266         }
267         catch(exception& e) {
268                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
269                 exit(1);
270         }
271 }
272 //**********************************************************************************************************************
273
274 void ChimeraPintailCommand::help(){
275         try {
276         
277                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
278                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
279                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
280                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
281                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
282                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
283                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
284                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
285                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
286                 #ifdef USE_MPI
287                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
288                 #endif
289                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
290                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
291                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
292                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
293                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
294                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
295                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
296                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "ChimeraPintailCommand", "help");
300                 exit(1);
301         }
302 }
303
304 //***************************************************************************************************************
305
306 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
307
308 //***************************************************************************************************************
309
310 int ChimeraPintailCommand::execute(){
311         try{
312                 
313                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
314                 
315                 for (int s = 0; s < fastaFileNames.size(); s++) {
316                                 
317                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
318
319                         int start = time(NULL); 
320                         
321                         //set user options
322                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
323                         
324                         //check for quantile to save the time
325                         string tempQuan = "";
326                         if ((!filter) && (maskfile == "")) {
327                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
328                         }else if ((!filter) && (maskfile != "")) { 
329                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
330                         }else if ((filter) && (maskfile != "")) { 
331                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
332                         }else if ((filter) && (maskfile == "")) { 
333                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
334                         }
335                         
336                         ifstream FileTest(tempQuan.c_str());
337                         if(FileTest){   
338                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
339                                 if (GoodFile) {  
340                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
341                                 }
342                         }else {
343                                 string tryPath = m->getDefaultPath();
344                                 string tempQuan = "";
345                                 if ((!filter) && (maskfile == "")) {
346                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
347                                 }else if ((!filter) && (maskfile != "")) { 
348                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
349                                 }else if ((filter) && (maskfile != "")) { 
350                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
351                                 }else if ((filter) && (maskfile == "")) { 
352                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
353                                 }
354                                 
355                                 ifstream FileTest2(tempQuan.c_str());
356                                 if(FileTest2){  
357                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
358                                         if (GoodFile) {  
359                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
360                                         }
361                                 }
362                         }
363                         
364                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
365                         
366                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
367                         string outputFileName, accnosFileName;
368                         if (maskfile != "") {
369                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
370                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
371                         }else {
372                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
373                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
374                         }
375                         
376                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
377                         
378                         if (chimera->getUnaligned()) { 
379                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
380                                 delete chimera;
381                                 return 0; 
382                         }
383                         templateSeqsLength = chimera->getLength();
384                 
385                 #ifdef USE_MPI
386                         int pid, numSeqsPerProcessor; 
387                                 int tag = 2001;
388                                 vector<unsigned long int> MPIPos;
389                                 
390                                 MPI_Status status; 
391                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
392                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
393
394                                 MPI_File inMPI;
395                                 MPI_File outMPI;
396                                 MPI_File outMPIAccnos;
397                                 
398                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
399                                 int inMode=MPI_MODE_RDONLY; 
400                                 
401                                 char outFilename[1024];
402                                 strcpy(outFilename, outputFileName.c_str());
403                                 
404                                 char outAccnosFilename[1024];
405                                 strcpy(outAccnosFilename, accnosFileName.c_str());
406                                 
407                                 char inFileName[1024];
408                                 strcpy(inFileName, fastaFileNames[s].c_str());
409
410                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
411                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
412                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
413                                 
414                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
415
416                                 if (pid == 0) { //you are the root process 
417                                                                 
418                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
419                                         
420                                         //send file positions to all processes
421                                         for(int i = 1; i < processors; i++) { 
422                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
423                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
424                                         }
425                                         
426                                         //figure out how many sequences you have to align
427                                         numSeqsPerProcessor = numSeqs / processors;
428                                         int startIndex =  pid * numSeqsPerProcessor;
429                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
430                                 
431                                         //do your part
432                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
433                                         
434                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
435                                         
436                                 }else{ //you are a child process
437                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
438                                         MPIPos.resize(numSeqs+1);
439                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
440                                         
441                                         //figure out how many sequences you have to align
442                                         numSeqsPerProcessor = numSeqs / processors;
443                                         int startIndex =  pid * numSeqsPerProcessor;
444                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
445                                         
446                                         //do your part
447                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
448                                         
449                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
450                                 }
451                                 
452                                 //close files 
453                                 MPI_File_close(&inMPI);
454                                 MPI_File_close(&outMPI);
455                                 MPI_File_close(&outMPIAccnos);
456                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
457                 #else
458                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
459                                 
460                         for (int i = 0; i < (positions.size()-1); i++) {
461                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
462                         }       
463                         
464                         //break up file
465                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
466                                 if(processors == 1){
467                 
468                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
469                                         
470                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
471                                         
472                                 }else{
473                                         processIDS.resize(0);
474                                         
475                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
476                                 
477                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
478                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
479                                                 
480                                         //append output files
481                                         for(int i=1;i<processors;i++){
482                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
483                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
484                                         }
485                                         
486                                         //append output files
487                                         for(int i=1;i<processors;i++){
488                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
489                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
490                                         }
491                                                                                 
492                                         if (m->control_pressed) { 
493                                                 remove(outputFileName.c_str()); 
494                                                 remove(accnosFileName.c_str());
495                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
496                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
497                                                 delete chimera;
498                                                 return 0;
499                                         }
500                                 }
501
502                         #else
503                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
504                                 
505                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
506                         #endif
507                         
508                 #endif  
509                 
510                         delete chimera;
511                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
512                         
513                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
514                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
515                         
516                         m->mothurOutEndLine();
517                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
518                 }
519                 
520                 m->mothurOutEndLine();
521                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
522                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
523                 m->mothurOutEndLine();
524                         
525                 return 0;
526                 
527         }
528         catch(exception& e) {
529                 m->errorOut(e, "ChimeraPintailCommand", "execute");
530                 exit(1);
531         }
532 }
533 //**********************************************************************************************************************
534
535 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
536         try {
537                 ofstream out;
538                 m->openOutputFile(outputFName, out);
539                 
540                 ofstream out2;
541                 m->openOutputFile(accnos, out2);
542                 
543                 ifstream inFASTA;
544                 m->openInputFile(filename, inFASTA);
545
546                 inFASTA.seekg(filePos->start);
547
548                 bool done = false;
549                 int count = 0;
550         
551                 while (!done) {
552                                 
553                         if (m->control_pressed) {       return 1;       }
554                 
555                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
556                                 
557                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
558                                 
559                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
560                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
561                                 }else{
562                                         //find chimeras
563                                         chimera->getChimeras(candidateSeq);
564                                         
565                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
566                 
567                                         //print results
568                                         chimera->print(out, out2);
569                                 }
570                                 count++;
571                         }
572                         delete candidateSeq;
573                         
574                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
575                                 unsigned long int pos = inFASTA.tellg();
576                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
577                         #else
578                                 if (inFASTA.eof()) { break; }
579                         #endif
580                         
581                         //report progress
582                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
583                 }
584                 //report progress
585                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
586                 
587                 out.close();
588                 out2.close();
589                 inFASTA.close();
590                                 
591                 return count;
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "ChimeraPintailCommand", "driver");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599 #ifdef USE_MPI
600 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
601         try {
602                                 
603                 MPI_Status status; 
604                 int pid;
605                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
606                 
607                 for(int i=0;i<num;i++){
608                         
609                         if (m->control_pressed) {       return 1;       }
610                         
611                         //read next sequence
612                         int length = MPIPos[start+i+1] - MPIPos[start+i];
613         
614                         char* buf4 = new char[length];
615                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
616                         
617                         string tempBuf = buf4;
618                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
619                         istringstream iss (tempBuf,istringstream::in);
620                         delete buf4;
621
622                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
623                                 
624                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
625                                 
626                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
627                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
628                                 }else{
629                                         //find chimeras
630                                         chimera->getChimeras(candidateSeq);
631                                         
632                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
633                 
634                                         //print results
635                                         chimera->print(outMPI, outAccMPI);
636                                 }
637                         }
638                         delete candidateSeq;
639                         
640                         //report progress
641                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
642                 }
643                 //report progress
644                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
645                 
646                                 
647                 return 0;
648         }
649         catch(exception& e) {
650                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
651                 exit(1);
652         }
653 }
654 #endif
655
656 /**************************************************************************************************/
657
658 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
659         try {
660 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
661                 int process = 0;
662                 int num = 0;
663                 
664                 //loop through and create all the processes you want
665                 while (process != processors) {
666                         int pid = fork();
667                         
668                         if (pid > 0) {
669                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
670                                 process++;
671                         }else if (pid == 0){
672                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
673                                 
674                                 //pass numSeqs to parent
675                                 ofstream out;
676                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
677                                 m->openOutputFile(tempFile, out);
678                                 out << num << endl;
679                                 out.close();
680
681                                 exit(0);
682                         }else { 
683                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
684                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
685                                 exit(0);
686                         }
687                 }
688                 
689                 //force parent to wait until all the processes are done
690                 for (int i=0;i<processors;i++) { 
691                         int temp = processIDS[i];
692                         wait(&temp);
693                 }
694                 
695                 for (int i = 0; i < processIDS.size(); i++) {
696                         ifstream in;
697                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
698                         m->openInputFile(tempFile, in);
699                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
700                         in.close(); remove(tempFile.c_str());
701                 }
702                 
703                 return num;
704 #endif          
705         }
706         catch(exception& e) {
707                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
708                 exit(1);
709         }
710 }
711
712 /**************************************************************************************************/
713
714