]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
cluster.split fix
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::getValidParameters(){     
15         try {
16                 string AlignArray[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ChimeraPintailCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ChimeraPintailCommand::ChimeraPintailCommand(){ 
27         try {
28                 vector<string> tempOutNames;
29                 outputTypes["chimera"] = tempOutNames;
30                 outputTypes["accnos"] = tempOutNames;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> ChimeraPintailCommand::getRequiredParameters(){  
39         try {
40                 string AlignArray[] =  {"template","fasta"};
41                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 vector<string> ChimeraPintailCommand::getRequiredFiles(){       
51         try {
52                 vector<string> myArray;
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredFiles");
57                 exit(1);
58         }
59 }
60 //***************************************************************************************************************
61 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
62         try {
63                 abort = false;
64                 
65                 //allow user to run help
66                 if(option == "help") { help(); abort = true; }
67                 
68                 else {
69                         //valid paramters for this command
70                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
71                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
72                         
73                         OptionParser parser(option);
74                         map<string,string> parameters = parser.getParameters();
75                         
76                         ValidParameters validParameter("chimera.pintail");
77                         map<string,string>::iterator it;
78                         
79                         //check to make sure all parameters are valid for command
80                         for (it = parameters.begin(); it != parameters.end(); it++) { 
81                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
82                         }
83                         
84                         vector<string> tempOutNames;
85                         outputTypes["chimera"] = tempOutNames;
86                         outputTypes["accnos"] = tempOutNames;
87                 
88                         //if the user changes the input directory command factory will send this info to us in the output parameter 
89                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
90                         if (inputDir == "not found"){   inputDir = "";          }
91                         else {
92                                 string path;
93                                 it = parameters.find("template");
94                                 //user has given a template file
95                                 if(it != parameters.end()){ 
96                                         path = m->hasPath(it->second);
97                                         //if the user has not given a path then, add inputdir. else leave path alone.
98                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
99                                 }
100                                 
101                                 it = parameters.find("conservation");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
107                                 }
108                                 
109                                 it = parameters.find("quantile");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
115                                 }
116                         }
117
118                         
119                         //check for required parameters
120                         fastafile = validParameter.validFile(parameters, "fasta", false);
121                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
122                         else { 
123                                 m->splitAtDash(fastafile, fastaFileNames);
124                                 
125                                 //go through files and make sure they are good, if not, then disregard them
126                                 for (int i = 0; i < fastaFileNames.size(); i++) {
127                                         if (inputDir != "") {
128                                                 string path = m->hasPath(fastaFileNames[i]);
129                                                 //if the user has not given a path then, add inputdir. else leave path alone.
130                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
131                                         }
132         
133                                         int ableToOpen;
134                                         ifstream in;
135                                         
136                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
137                                 
138                                         //if you can't open it, try default location
139                                         if (ableToOpen == 1) {
140                                                 if (m->getDefaultPath() != "") { //default path is set
141                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
142                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
143                                                         ifstream in2;
144                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
145                                                         in2.close();
146                                                         fastaFileNames[i] = tryPath;
147                                                 }
148                                         }
149                                         
150                                         if (ableToOpen == 1) {
151                                                 if (m->getOutputDir() != "") { //default path is set
152                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
153                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
154                                                         ifstream in2;
155                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
156                                                         in2.close();
157                                                         fastaFileNames[i] = tryPath;
158                                                 }
159                                         }
160
161                                         in.close();
162                                         
163                                         if (ableToOpen == 1) { 
164                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
165                                                 //erase from file list
166                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
167                                                 i--;
168                                         }
169                                 }
170                                 
171                                 //make sure there is at least one valid file left
172                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
173                         }
174                         
175                         string temp;
176                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
177                         filter = m->isTrue(temp);
178                         
179                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
180                         convert(temp, processors);
181                         
182                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
183                         convert(temp, window);
184                         
185                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
186                         convert(temp, increment);
187                         
188                         maskfile = validParameter.validFile(parameters, "mask", false);
189                         if (maskfile == "not found") { maskfile = "";  }        
190                         else if (maskfile != "default")  { 
191                                 if (inputDir != "") {
192                                         string path = m->hasPath(maskfile);
193                                         //if the user has not given a path then, add inputdir. else leave path alone.
194                                         if (path == "") {       maskfile = inputDir + maskfile;         }
195                                 }
196
197                                 ifstream in;
198                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
199                                 if (ableToOpen == 1) { 
200                                         if (m->getDefaultPath() != "") { //default path is set
201                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
202                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
203                                                         ifstream in2;
204                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
205                                                         in2.close();
206                                                         maskfile = tryPath;
207                                         }
208                                 }
209                                 
210                                 if (ableToOpen == 1) {
211                                                 if (m->getOutputDir() != "") { //default path is set
212                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
213                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
214                                                         ifstream in2;
215                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
216                                                         in2.close();
217                                                         maskfile = tryPath;
218                                                 }
219                                 }
220                                 
221                                 in.close();
222                                         
223                                 if (ableToOpen == 1) { 
224                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
225                                                 abort = true;
226                                 }
227                         }
228
229                         
230                         //if the user changes the output directory command factory will send this info to us in the output parameter 
231                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
232                 
233                         templatefile = validParameter.validFile(parameters, "template", true);
234                         if (templatefile == "not open") { abort = true; }
235                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
236                         
237                         consfile = validParameter.validFile(parameters, "conservation", true);
238                         if (consfile == "not open") { abort = true; }
239                         else if (consfile == "not found") { 
240                                 consfile = "";  
241                                 //check for consfile
242                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
243                                 ifstream FileTest(tempConsFile.c_str());
244                                 if(FileTest){   
245                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
246                                         if (GoodFile) {  
247                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
248                                         }
249                                 }else {
250                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
251                                         ifstream FileTest2(tempConsFile.c_str());
252                                         if(FileTest2){  
253                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
254                                                 if (GoodFile) {  
255                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
256                                                 }
257                                         }
258                                 }
259                         }       
260                         
261                         quanfile = validParameter.validFile(parameters, "quantile", true);
262                         if (quanfile == "not open") { abort = true; }
263                         else if (quanfile == "not found") { quanfile = ""; }
264                 }
265         }
266         catch(exception& e) {
267                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
268                 exit(1);
269         }
270 }
271 //**********************************************************************************************************************
272
273 void ChimeraPintailCommand::help(){
274         try {
275         
276                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
277                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
278                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
279                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
280                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
281                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
282                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
283                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
284                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
285                 #ifdef USE_MPI
286                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
287                 #endif
288                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
289                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
290                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
291                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
292                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
293                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
294                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
295                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
296         }
297         catch(exception& e) {
298                 m->errorOut(e, "ChimeraPintailCommand", "help");
299                 exit(1);
300         }
301 }
302
303 //***************************************************************************************************************
304
305 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
306
307 //***************************************************************************************************************
308
309 int ChimeraPintailCommand::execute(){
310         try{
311                 
312                 if (abort == true) { return 0; }
313                 
314                 for (int s = 0; s < fastaFileNames.size(); s++) {
315                                 
316                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
317
318                         int start = time(NULL); 
319                         
320                         //set user options
321                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
322                         
323                         //check for quantile to save the time
324                         string tempQuan = "";
325                         if ((!filter) && (maskfile == "")) {
326                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
327                         }else if ((!filter) && (maskfile != "")) { 
328                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
329                         }else if ((filter) && (maskfile != "")) { 
330                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
331                         }else if ((filter) && (maskfile == "")) { 
332                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
333                         }
334                         
335                         ifstream FileTest(tempQuan.c_str());
336                         if(FileTest){   
337                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
338                                 if (GoodFile) {  
339                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
340                                 }
341                         }else {
342                                 string tryPath = m->getDefaultPath();
343                                 string tempQuan = "";
344                                 if ((!filter) && (maskfile == "")) {
345                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
346                                 }else if ((!filter) && (maskfile != "")) { 
347                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
348                                 }else if ((filter) && (maskfile != "")) { 
349                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
350                                 }else if ((filter) && (maskfile == "")) { 
351                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
352                                 }
353                                 
354                                 ifstream FileTest2(tempQuan.c_str());
355                                 if(FileTest2){  
356                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
357                                         if (GoodFile) {  
358                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
359                                         }
360                                 }
361                         }
362                         
363                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
364                         
365                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
366                         string outputFileName, accnosFileName;
367                         if (maskfile != "") {
368                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
369                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
370                         }else {
371                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
372                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
373                         }
374                         
375                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
376                         
377                         if (chimera->getUnaligned()) { 
378                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
379                                 delete chimera;
380                                 return 0; 
381                         }
382                         templateSeqsLength = chimera->getLength();
383                 
384                 #ifdef USE_MPI
385                         int pid, end, numSeqsPerProcessor; 
386                                 int tag = 2001;
387                                 vector<unsigned long int> MPIPos;
388                                 
389                                 MPI_Status status; 
390                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
391                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
392
393                                 MPI_File inMPI;
394                                 MPI_File outMPI;
395                                 MPI_File outMPIAccnos;
396                                 
397                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
398                                 int inMode=MPI_MODE_RDONLY; 
399                                 
400                                 char outFilename[1024];
401                                 strcpy(outFilename, outputFileName.c_str());
402                                 
403                                 char outAccnosFilename[1024];
404                                 strcpy(outAccnosFilename, accnosFileName.c_str());
405                                 
406                                 char inFileName[1024];
407                                 strcpy(inFileName, fastaFileNames[s].c_str());
408
409                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
410                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
411                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
412                                 
413                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
414
415                                 if (pid == 0) { //you are the root process 
416                                                                 
417                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
418                                         
419                                         //send file positions to all processes
420                                         for(int i = 1; i < processors; i++) { 
421                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
422                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
423                                         }
424                                         
425                                         //figure out how many sequences you have to align
426                                         numSeqsPerProcessor = numSeqs / processors;
427                                         int startIndex =  pid * numSeqsPerProcessor;
428                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
429                                 
430                                         //do your part
431                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
432                                         
433                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
434                                         
435                                 }else{ //you are a child process
436                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
437                                         MPIPos.resize(numSeqs+1);
438                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
439                                         
440                                         //figure out how many sequences you have to align
441                                         numSeqsPerProcessor = numSeqs / processors;
442                                         int startIndex =  pid * numSeqsPerProcessor;
443                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
444                                         
445                                         //do your part
446                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
447                                         
448                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
449                                 }
450                                 
451                                 //close files 
452                                 MPI_File_close(&inMPI);
453                                 MPI_File_close(&outMPI);
454                                 MPI_File_close(&outMPIAccnos);
455                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
456                 #else
457                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
458                                 
459                         for (int i = 0; i < (positions.size()-1); i++) {
460                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
461                         }       
462                         
463                         //break up file
464                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
465                                 if(processors == 1){
466                 
467                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
468                                         
469                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
470                                         
471                                 }else{
472                                         processIDS.resize(0);
473                                         
474                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
475                                 
476                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
477                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
478                                                 
479                                         //append output files
480                                         for(int i=1;i<processors;i++){
481                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
482                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
483                                         }
484                                         
485                                         //append output files
486                                         for(int i=1;i<processors;i++){
487                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
488                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
489                                         }
490                                                                                 
491                                         if (m->control_pressed) { 
492                                                 remove(outputFileName.c_str()); 
493                                                 remove(accnosFileName.c_str());
494                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
495                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
496                                                 delete chimera;
497                                                 return 0;
498                                         }
499                                 }
500
501                         #else
502                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
503                                 
504                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
505                         #endif
506                         
507                 #endif  
508                 
509                         delete chimera;
510                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
511                         
512                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
513                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
514                         
515                         m->mothurOutEndLine();
516                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
517                 }
518                 
519                 m->mothurOutEndLine();
520                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
521                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
522                 m->mothurOutEndLine();
523                         
524                 return 0;
525                 
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "ChimeraPintailCommand", "execute");
529                 exit(1);
530         }
531 }
532 //**********************************************************************************************************************
533
534 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
535         try {
536                 ofstream out;
537                 m->openOutputFile(outputFName, out);
538                 
539                 ofstream out2;
540                 m->openOutputFile(accnos, out2);
541                 
542                 ifstream inFASTA;
543                 m->openInputFile(filename, inFASTA);
544
545                 inFASTA.seekg(filePos->start);
546
547                 bool done = false;
548                 int count = 0;
549         
550                 while (!done) {
551                                 
552                         if (m->control_pressed) {       return 1;       }
553                 
554                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
555                                 
556                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
557                                 
558                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
559                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
560                                 }else{
561                                         //find chimeras
562                                         chimera->getChimeras(candidateSeq);
563                                         
564                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
565                 
566                                         //print results
567                                         chimera->print(out, out2);
568                                 }
569                                 count++;
570                         }
571                         delete candidateSeq;
572                         
573                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
574                                 unsigned long int pos = inFASTA.tellg();
575                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
576                         #else
577                                 if (inFASTA.eof()) { break; }
578                         #endif
579                         
580                         //report progress
581                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
582                 }
583                 //report progress
584                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
585                 
586                 out.close();
587                 out2.close();
588                 inFASTA.close();
589                                 
590                 return count;
591         }
592         catch(exception& e) {
593                 m->errorOut(e, "ChimeraPintailCommand", "driver");
594                 exit(1);
595         }
596 }
597 //**********************************************************************************************************************
598 #ifdef USE_MPI
599 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
600         try {
601                                 
602                 MPI_Status status; 
603                 int pid;
604                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
605                 
606                 for(int i=0;i<num;i++){
607                         
608                         if (m->control_pressed) {       return 1;       }
609                         
610                         //read next sequence
611                         int length = MPIPos[start+i+1] - MPIPos[start+i];
612         
613                         char* buf4 = new char[length];
614                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
615                         
616                         string tempBuf = buf4;
617                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
618                         istringstream iss (tempBuf,istringstream::in);
619                         delete buf4;
620
621                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
622                                 
623                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
624                                 
625                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
626                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
627                                 }else{
628                                         //find chimeras
629                                         chimera->getChimeras(candidateSeq);
630                                         
631                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
632                 
633                                         //print results
634                                         bool isChimeric = chimera->print(outMPI, outAccMPI);
635                                 }
636                         }
637                         delete candidateSeq;
638                         
639                         //report progress
640                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
641                 }
642                 //report progress
643                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
644                 
645                                 
646                 return 0;
647         }
648         catch(exception& e) {
649                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
650                 exit(1);
651         }
652 }
653 #endif
654
655 /**************************************************************************************************/
656
657 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
658         try {
659 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
660                 int process = 0;
661                 int num = 0;
662                 
663                 //loop through and create all the processes you want
664                 while (process != processors) {
665                         int pid = fork();
666                         
667                         if (pid > 0) {
668                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
669                                 process++;
670                         }else if (pid == 0){
671                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
672                                 
673                                 //pass numSeqs to parent
674                                 ofstream out;
675                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
676                                 m->openOutputFile(tempFile, out);
677                                 out << num << endl;
678                                 out.close();
679
680                                 exit(0);
681                         }else { 
682                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
683                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
684                                 exit(0);
685                         }
686                 }
687                 
688                 //force parent to wait until all the processes are done
689                 for (int i=0;i<processors;i++) { 
690                         int temp = processIDS[i];
691                         wait(&temp);
692                 }
693                 
694                 for (int i = 0; i < processIDS.size(); i++) {
695                         ifstream in;
696                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
697                         m->openInputFile(tempFile, in);
698                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
699                         in.close(); remove(tempFile.c_str());
700                 }
701                 
702                 return num;
703 #endif          
704         }
705         catch(exception& e) {
706                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
707                 exit(1);
708         }
709 }
710
711 /**************************************************************************************************/
712
713