]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::getValidParameters(){     
15         try {
16                 string AlignArray[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ChimeraPintailCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ChimeraPintailCommand::ChimeraPintailCommand(){ 
27         try {
28                 abort = true; calledHelp = true;
29                 vector<string> tempOutNames;
30                 outputTypes["chimera"] = tempOutNames;
31                 outputTypes["accnos"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> ChimeraPintailCommand::getRequiredParameters(){  
40         try {
41                 string AlignArray[] =  {"template","fasta"};
42                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> ChimeraPintailCommand::getRequiredFiles(){       
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //***************************************************************************************************************
62 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
63         try {
64                 abort = false; calledHelp = false;   
65                 
66                 //allow user to run help
67                 if(option == "help") { help(); abort = true; calledHelp = true; }
68                 
69                 else {
70                         //valid paramters for this command
71                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
72                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
73                         
74                         OptionParser parser(option);
75                         map<string,string> parameters = parser.getParameters();
76                         
77                         ValidParameters validParameter("chimera.pintail");
78                         map<string,string>::iterator it;
79                         
80                         //check to make sure all parameters are valid for command
81                         for (it = parameters.begin(); it != parameters.end(); it++) { 
82                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
83                         }
84                         
85                         vector<string> tempOutNames;
86                         outputTypes["chimera"] = tempOutNames;
87                         outputTypes["accnos"] = tempOutNames;
88                 
89                         //if the user changes the input directory command factory will send this info to us in the output parameter 
90                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
91                         if (inputDir == "not found"){   inputDir = "";          }
92                         else {
93                                 string path;
94                                 it = parameters.find("template");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = m->hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
100                                 }
101                                 
102                                 it = parameters.find("conservation");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("quantile");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
116                                 }
117                         }
118
119                         
120                         //check for required parameters
121                         fastafile = validParameter.validFile(parameters, "fasta", false);
122                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
123                         else { 
124                                 m->splitAtDash(fastafile, fastaFileNames);
125                                 
126                                 //go through files and make sure they are good, if not, then disregard them
127                                 for (int i = 0; i < fastaFileNames.size(); i++) {
128                                         if (inputDir != "") {
129                                                 string path = m->hasPath(fastaFileNames[i]);
130                                                 //if the user has not given a path then, add inputdir. else leave path alone.
131                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
132                                         }
133         
134                                         int ableToOpen;
135                                         ifstream in;
136                                         
137                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
138                                 
139                                         //if you can't open it, try default location
140                                         if (ableToOpen == 1) {
141                                                 if (m->getDefaultPath() != "") { //default path is set
142                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
143                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
144                                                         ifstream in2;
145                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
146                                                         in2.close();
147                                                         fastaFileNames[i] = tryPath;
148                                                 }
149                                         }
150                                         
151                                         if (ableToOpen == 1) {
152                                                 if (m->getOutputDir() != "") { //default path is set
153                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
154                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
155                                                         ifstream in2;
156                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
157                                                         in2.close();
158                                                         fastaFileNames[i] = tryPath;
159                                                 }
160                                         }
161
162                                         in.close();
163                                         
164                                         if (ableToOpen == 1) { 
165                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
166                                                 //erase from file list
167                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
168                                                 i--;
169                                         }
170                                 }
171                                 
172                                 //make sure there is at least one valid file left
173                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
174                         }
175                         
176                         string temp;
177                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
178                         filter = m->isTrue(temp);
179                         
180                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
181                         convert(temp, processors);
182                         
183                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
184                         convert(temp, window);
185                         
186                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
187                         convert(temp, increment);
188                         
189                         maskfile = validParameter.validFile(parameters, "mask", false);
190                         if (maskfile == "not found") { maskfile = "";  }        
191                         else if (maskfile != "default")  { 
192                                 if (inputDir != "") {
193                                         string path = m->hasPath(maskfile);
194                                         //if the user has not given a path then, add inputdir. else leave path alone.
195                                         if (path == "") {       maskfile = inputDir + maskfile;         }
196                                 }
197
198                                 ifstream in;
199                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
200                                 if (ableToOpen == 1) { 
201                                         if (m->getDefaultPath() != "") { //default path is set
202                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
203                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
204                                                         ifstream in2;
205                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
206                                                         in2.close();
207                                                         maskfile = tryPath;
208                                         }
209                                 }
210                                 
211                                 if (ableToOpen == 1) {
212                                                 if (m->getOutputDir() != "") { //default path is set
213                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
214                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
215                                                         ifstream in2;
216                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
217                                                         in2.close();
218                                                         maskfile = tryPath;
219                                                 }
220                                 }
221                                 
222                                 in.close();
223                                         
224                                 if (ableToOpen == 1) { 
225                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
226                                                 abort = true;
227                                 }
228                         }
229
230                         
231                         //if the user changes the output directory command factory will send this info to us in the output parameter 
232                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
233                 
234                         templatefile = validParameter.validFile(parameters, "template", true);
235                         if (templatefile == "not open") { abort = true; }
236                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
237                         
238                         consfile = validParameter.validFile(parameters, "conservation", true);
239                         if (consfile == "not open") { abort = true; }
240                         else if (consfile == "not found") { 
241                                 consfile = "";  
242                                 //check for consfile
243                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
244                                 ifstream FileTest(tempConsFile.c_str());
245                                 if(FileTest){   
246                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
247                                         if (GoodFile) {  
248                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
249                                         }
250                                 }else {
251                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
252                                         ifstream FileTest2(tempConsFile.c_str());
253                                         if(FileTest2){  
254                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
255                                                 if (GoodFile) {  
256                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
257                                                 }
258                                         }
259                                 }
260                         }       
261                         
262                         quanfile = validParameter.validFile(parameters, "quantile", true);
263                         if (quanfile == "not open") { abort = true; }
264                         else if (quanfile == "not found") { quanfile = ""; }
265                 }
266         }
267         catch(exception& e) {
268                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
269                 exit(1);
270         }
271 }
272 //**********************************************************************************************************************
273
274 void ChimeraPintailCommand::help(){
275         try {
276         
277                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
278                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
279                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
280                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
281                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
282                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
283                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
284                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
285                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
286                 #ifdef USE_MPI
287                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
288                 #endif
289                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
290                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
291                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
292                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
293                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
294                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
295                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
296                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "ChimeraPintailCommand", "help");
300                 exit(1);
301         }
302 }
303
304 //***************************************************************************************************************
305
306 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
307
308 //***************************************************************************************************************
309
310 int ChimeraPintailCommand::execute(){
311         try{
312                 
313                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
314                 
315                 for (int s = 0; s < fastaFileNames.size(); s++) {
316                                 
317                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
318
319                         int start = time(NULL); 
320                         
321                         //set user options
322                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
323                         
324                         //check for quantile to save the time
325                         string tempQuan = "";
326                         if ((!filter) && (maskfile == "")) {
327                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
328                         }else if ((!filter) && (maskfile != "")) { 
329                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
330                         }else if ((filter) && (maskfile != "")) { 
331                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
332                         }else if ((filter) && (maskfile == "")) { 
333                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
334                         }
335                         
336                         ifstream FileTest(tempQuan.c_str());
337                         if(FileTest){   
338                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
339                                 if (GoodFile) {  
340                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
341                                 }
342                         }else {
343                                 string tryPath = m->getDefaultPath();
344                                 string tempQuan = "";
345                                 if ((!filter) && (maskfile == "")) {
346                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
347                                 }else if ((!filter) && (maskfile != "")) { 
348                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
349                                 }else if ((filter) && (maskfile != "")) { 
350                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
351                                 }else if ((filter) && (maskfile == "")) { 
352                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
353                                 }
354                                 
355                                 ifstream FileTest2(tempQuan.c_str());
356                                 if(FileTest2){  
357                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
358                                         if (GoodFile) {  
359                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
360                                         }
361                                 }
362                         }
363                         
364                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
365                         
366                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
367                         string outputFileName, accnosFileName;
368                         if (maskfile != "") {
369                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
370                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
371                         }else {
372                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
373                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
374                         }
375                         
376                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
377                         
378                         if (chimera->getUnaligned()) { 
379                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
380                                 delete chimera;
381                                 return 0; 
382                         }
383                         templateSeqsLength = chimera->getLength();
384                 
385                 #ifdef USE_MPI
386                         int pid, numSeqsPerProcessor; 
387                                 int tag = 2001;
388                                 vector<unsigned long int> MPIPos;
389                                 
390                                 MPI_Status status; 
391                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
392                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
393
394                                 MPI_File inMPI;
395                                 MPI_File outMPI;
396                                 MPI_File outMPIAccnos;
397                                 
398                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
399                                 int inMode=MPI_MODE_RDONLY; 
400                                 
401                                 char outFilename[1024];
402                                 strcpy(outFilename, outputFileName.c_str());
403                                 
404                                 char outAccnosFilename[1024];
405                                 strcpy(outAccnosFilename, accnosFileName.c_str());
406                                 
407                                 char inFileName[1024];
408                                 strcpy(inFileName, fastaFileNames[s].c_str());
409
410                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
411                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
412                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
413                                 
414                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
415
416                                 if (pid == 0) { //you are the root process 
417                                                                 
418                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
419                                         
420                                         //send file positions to all processes
421                                         for(int i = 1; i < processors; i++) { 
422                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
423                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
424                                         }
425                                         
426                                         //figure out how many sequences you have to align
427                                         numSeqsPerProcessor = numSeqs / processors;
428                                         int startIndex =  pid * numSeqsPerProcessor;
429                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
430                                 
431                                         //do your part
432                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
433                                         
434                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
435                                         
436                                 }else{ //you are a child process
437                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
438                                         MPIPos.resize(numSeqs+1);
439                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
440                                         
441                                         //figure out how many sequences you have to align
442                                         numSeqsPerProcessor = numSeqs / processors;
443                                         int startIndex =  pid * numSeqsPerProcessor;
444                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
445                                         
446                                         //do your part
447                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
448                                         
449                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
450                                 }
451                                 
452                                 //close files 
453                                 MPI_File_close(&inMPI);
454                                 MPI_File_close(&outMPI);
455                                 MPI_File_close(&outMPIAccnos);
456                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
457                 #else
458                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
459                                 
460                         for (int i = 0; i < (positions.size()-1); i++) {
461                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
462                         }       
463                         
464                         //break up file
465                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
466                                 if(processors == 1){
467                 
468                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
469                                         
470                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
471                                         
472                                 }else{
473                                         processIDS.resize(0);
474                                         
475                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
476                                 
477                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
478                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
479                                                 
480                                         //append output files
481                                         for(int i=1;i<processors;i++){
482                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
483                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
484                                         }
485                                         
486                                         //append output files
487                                         for(int i=1;i<processors;i++){
488                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
489                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
490                                         }
491                                                                                 
492                                         if (m->control_pressed) { 
493                                                 remove(outputFileName.c_str()); 
494                                                 remove(accnosFileName.c_str());
495                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
496                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
497                                                 delete chimera;
498                                                 return 0;
499                                         }
500                                 }
501
502                         #else
503                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
504                                 
505                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
506                         #endif
507                         
508                 #endif  
509                 
510                         delete chimera;
511                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
512                         
513                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
514                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
515                         
516                         m->mothurOutEndLine();
517                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
518                 }
519                 
520                 //set accnos file as new current accnosfile
521                 string current = "";
522                 itTypes = outputTypes.find("accnos");
523                 if (itTypes != outputTypes.end()) {
524                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
525                 }
526                 
527                 m->mothurOutEndLine();
528                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
529                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
530                 m->mothurOutEndLine();
531                         
532                 return 0;
533                 
534         }
535         catch(exception& e) {
536                 m->errorOut(e, "ChimeraPintailCommand", "execute");
537                 exit(1);
538         }
539 }
540 //**********************************************************************************************************************
541
542 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
543         try {
544                 ofstream out;
545                 m->openOutputFile(outputFName, out);
546                 
547                 ofstream out2;
548                 m->openOutputFile(accnos, out2);
549                 
550                 ifstream inFASTA;
551                 m->openInputFile(filename, inFASTA);
552
553                 inFASTA.seekg(filePos->start);
554
555                 bool done = false;
556                 int count = 0;
557         
558                 while (!done) {
559                                 
560                         if (m->control_pressed) {       return 1;       }
561                 
562                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
563                                 
564                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
565                                 
566                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
567                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
568                                 }else{
569                                         //find chimeras
570                                         chimera->getChimeras(candidateSeq);
571                                         
572                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
573                 
574                                         //print results
575                                         chimera->print(out, out2);
576                                 }
577                                 count++;
578                         }
579                         delete candidateSeq;
580                         
581                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
582                                 unsigned long int pos = inFASTA.tellg();
583                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
584                         #else
585                                 if (inFASTA.eof()) { break; }
586                         #endif
587                         
588                         //report progress
589                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
590                 }
591                 //report progress
592                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
593                 
594                 out.close();
595                 out2.close();
596                 inFASTA.close();
597                                 
598                 return count;
599         }
600         catch(exception& e) {
601                 m->errorOut(e, "ChimeraPintailCommand", "driver");
602                 exit(1);
603         }
604 }
605 //**********************************************************************************************************************
606 #ifdef USE_MPI
607 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
608         try {
609                                 
610                 MPI_Status status; 
611                 int pid;
612                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
613                 
614                 for(int i=0;i<num;i++){
615                         
616                         if (m->control_pressed) {       return 1;       }
617                         
618                         //read next sequence
619                         int length = MPIPos[start+i+1] - MPIPos[start+i];
620         
621                         char* buf4 = new char[length];
622                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
623                         
624                         string tempBuf = buf4;
625                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
626                         istringstream iss (tempBuf,istringstream::in);
627                         delete buf4;
628
629                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
630                                 
631                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
632                                 
633                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
634                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
635                                 }else{
636                                         //find chimeras
637                                         chimera->getChimeras(candidateSeq);
638                                         
639                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
640                 
641                                         //print results
642                                         chimera->print(outMPI, outAccMPI);
643                                 }
644                         }
645                         delete candidateSeq;
646                         
647                         //report progress
648                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
649                 }
650                 //report progress
651                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
652                 
653                                 
654                 return 0;
655         }
656         catch(exception& e) {
657                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
658                 exit(1);
659         }
660 }
661 #endif
662
663 /**************************************************************************************************/
664
665 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
666         try {
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
668                 int process = 0;
669                 int num = 0;
670                 
671                 //loop through and create all the processes you want
672                 while (process != processors) {
673                         int pid = fork();
674                         
675                         if (pid > 0) {
676                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
677                                 process++;
678                         }else if (pid == 0){
679                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
680                                 
681                                 //pass numSeqs to parent
682                                 ofstream out;
683                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
684                                 m->openOutputFile(tempFile, out);
685                                 out << num << endl;
686                                 out.close();
687
688                                 exit(0);
689                         }else { 
690                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
691                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
692                                 exit(0);
693                         }
694                 }
695                 
696                 //force parent to wait until all the processes are done
697                 for (int i=0;i<processors;i++) { 
698                         int temp = processIDS[i];
699                         wait(&temp);
700                 }
701                 
702                 for (int i = 0; i < processIDS.size(); i++) {
703                         ifstream in;
704                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
705                         m->openInputFile(tempFile, in);
706                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
707                         in.close(); remove(tempFile.c_str());
708                 }
709                 
710                 return num;
711 #endif          
712         }
713         catch(exception& e) {
714                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
715                 exit(1);
716         }
717 }
718
719 /**************************************************************************************************/
720
721