]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
added citation function to commands
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){  
15         try {
16                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){  
39         try {
40                 string helpString = "";
41                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50 #ifdef USE_MPI
51                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
52 #endif
53                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57                 helpString += "The chimera.pintail command should be in the following format: \n";
58                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
61                 return helpString;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){ 
70         try {
71                 abort = true; calledHelp = true;
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["chimera"] = tempOutNames;
75                 outputTypes["accnos"] = tempOutNames;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
79                 exit(1);
80         }
81 }
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
84         try {
85                 abort = false; calledHelp = false;   
86                 
87                 //allow user to run help
88                 if(option == "help") { help(); abort = true; calledHelp = true; }
89                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90                 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string,string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter("chimera.pintail");
98                         map<string,string>::iterator it;
99                         
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         vector<string> tempOutNames;
106                         outputTypes["chimera"] = tempOutNames;
107                         outputTypes["accnos"] = tempOutNames;
108                 
109                         //if the user changes the input directory command factory will send this info to us in the output parameter 
110                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                                 string path;
114                                 it = parameters.find("reference");
115                                 //user has given a template file
116                                 if(it != parameters.end()){ 
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
120                                 }
121                                 
122                                 it = parameters.find("conservation");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
128                                 }
129                                 
130                                 it = parameters.find("quantile");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
136                                 }
137                         }
138
139                         
140                         //check for required parameters
141                         fastafile = validParameter.validFile(parameters, "fasta", false);
142                         if (fastafile == "not found") {                                 
143                                 //if there is a current fasta file, use it
144                                 string filename = m->getFastaFile(); 
145                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
146                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
147                         }else { 
148                                 m->splitAtDash(fastafile, fastaFileNames);
149                                 
150                                 //go through files and make sure they are good, if not, then disregard them
151                                 for (int i = 0; i < fastaFileNames.size(); i++) {
152                                         
153                                         bool ignore = false;
154                                         if (fastaFileNames[i] == "current") { 
155                                                 fastaFileNames[i] = m->getFastaFile(); 
156                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
157                                                 else {  
158                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
159                                                         //erase from file list
160                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
161                                                         i--;
162                                                 }
163                                         }
164                                         
165                                         if (!ignore) {
166                                         
167                                                 if (inputDir != "") {
168                                                         string path = m->hasPath(fastaFileNames[i]);
169                                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
171                                                 }
172                 
173                                                 int ableToOpen;
174                                                 ifstream in;
175                                                 
176                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
177                                         
178                                                 //if you can't open it, try default location
179                                                 if (ableToOpen == 1) {
180                                                         if (m->getDefaultPath() != "") { //default path is set
181                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
182                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
183                                                                 ifstream in2;
184                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
185                                                                 in2.close();
186                                                                 fastaFileNames[i] = tryPath;
187                                                         }
188                                                 }
189                                                 
190                                                 if (ableToOpen == 1) {
191                                                         if (m->getOutputDir() != "") { //default path is set
192                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
193                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
194                                                                 ifstream in2;
195                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
196                                                                 in2.close();
197                                                                 fastaFileNames[i] = tryPath;
198                                                         }
199                                                 }
200
201                                                 in.close();
202                                                 
203                                                 if (ableToOpen == 1) { 
204                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
205                                                         //erase from file list
206                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
207                                                         i--;
208                                                 }
209                                         }
210                                 }
211                                 
212                                 //make sure there is at least one valid file left
213                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
214                         }
215                         
216                         string temp;
217                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
218                         filter = m->isTrue(temp);
219                         
220                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
221                         m->setProcessors(temp);
222                         convert(temp, processors);
223                         
224                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
225                         convert(temp, window);
226                         
227                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
228                         convert(temp, increment);
229                         
230                         maskfile = validParameter.validFile(parameters, "mask", false);
231                         if (maskfile == "not found") { maskfile = "";  }        
232                         else if (maskfile != "default")  { 
233                                 if (inputDir != "") {
234                                         string path = m->hasPath(maskfile);
235                                         //if the user has not given a path then, add inputdir. else leave path alone.
236                                         if (path == "") {       maskfile = inputDir + maskfile;         }
237                                 }
238
239                                 ifstream in;
240                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
241                                 if (ableToOpen == 1) { 
242                                         if (m->getDefaultPath() != "") { //default path is set
243                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
244                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
245                                                         ifstream in2;
246                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
247                                                         in2.close();
248                                                         maskfile = tryPath;
249                                         }
250                                 }
251                                 
252                                 if (ableToOpen == 1) {
253                                                 if (m->getOutputDir() != "") { //default path is set
254                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
255                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
256                                                         ifstream in2;
257                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
258                                                         in2.close();
259                                                         maskfile = tryPath;
260                                                 }
261                                 }
262                                 
263                                 in.close();
264                                         
265                                 if (ableToOpen == 1) { 
266                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
267                                                 abort = true;
268                                 }
269                         }
270
271                         
272                         //if the user changes the output directory command factory will send this info to us in the output parameter 
273                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
274                 
275                         templatefile = validParameter.validFile(parameters, "reference", true);
276                         if (templatefile == "not open") { abort = true; }
277                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
278                         
279                         consfile = validParameter.validFile(parameters, "conservation", true);
280                         if (consfile == "not open") { abort = true; }
281                         else if (consfile == "not found") { 
282                                 consfile = "";  
283                                 //check for consfile
284                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
285                                 ifstream FileTest(tempConsFile.c_str());
286                                 if(FileTest){   
287                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
288                                         if (GoodFile) {  
289                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
290                                         }
291                                 }else {
292                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
293                                         ifstream FileTest2(tempConsFile.c_str());
294                                         if(FileTest2){  
295                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
296                                                 if (GoodFile) {  
297                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
298                                                 }
299                                         }
300                                 }
301                         }       
302                         
303                         quanfile = validParameter.validFile(parameters, "quantile", true);
304                         if (quanfile == "not open") { abort = true; }
305                         else if (quanfile == "not found") { quanfile = ""; }
306                 }
307         }
308         catch(exception& e) {
309                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
310                 exit(1);
311         }
312 }
313 //***************************************************************************************************************
314
315 int ChimeraPintailCommand::execute(){
316         try{
317                 
318                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
319                 
320                 for (int s = 0; s < fastaFileNames.size(); s++) {
321                                 
322                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
323
324                         int start = time(NULL); 
325                         
326                         //set user options
327                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
328                         
329                         //check for quantile to save the time
330                         string tempQuan = "";
331                         if ((!filter) && (maskfile == "")) {
332                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
333                         }else if ((!filter) && (maskfile != "")) { 
334                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
335                         }else if ((filter) && (maskfile != "")) { 
336                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
337                         }else if ((filter) && (maskfile == "")) { 
338                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
339                         }
340                         
341                         ifstream FileTest(tempQuan.c_str());
342                         if(FileTest){   
343                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
344                                 if (GoodFile) {  
345                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
346                                 }
347                         }else {
348                                 string tryPath = m->getDefaultPath();
349                                 string tempQuan = "";
350                                 if ((!filter) && (maskfile == "")) {
351                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
352                                 }else if ((!filter) && (maskfile != "")) { 
353                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
354                                 }else if ((filter) && (maskfile != "")) { 
355                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
356                                 }else if ((filter) && (maskfile == "")) { 
357                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
358                                 }
359                                 
360                                 ifstream FileTest2(tempQuan.c_str());
361                                 if(FileTest2){  
362                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
363                                         if (GoodFile) {  
364                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
365                                         }
366                                 }
367                         }
368                         
369                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
370                         
371                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
372                         string outputFileName, accnosFileName;
373                         if (maskfile != "") {
374                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
375                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
376                         }else {
377                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
378                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
379                         }
380                         
381                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
382                         
383                         if (chimera->getUnaligned()) { 
384                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
385                                 delete chimera;
386                                 return 0; 
387                         }
388                         templateSeqsLength = chimera->getLength();
389                 
390                 #ifdef USE_MPI
391                         int pid, numSeqsPerProcessor; 
392                                 int tag = 2001;
393                                 vector<unsigned long int> MPIPos;
394                                 
395                                 MPI_Status status; 
396                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
397                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
398
399                                 MPI_File inMPI;
400                                 MPI_File outMPI;
401                                 MPI_File outMPIAccnos;
402                                 
403                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
404                                 int inMode=MPI_MODE_RDONLY; 
405                                 
406                                 char outFilename[1024];
407                                 strcpy(outFilename, outputFileName.c_str());
408                                 
409                                 char outAccnosFilename[1024];
410                                 strcpy(outAccnosFilename, accnosFileName.c_str());
411                                 
412                                 char inFileName[1024];
413                                 strcpy(inFileName, fastaFileNames[s].c_str());
414
415                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
416                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
417                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
418                                 
419                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
420
421                                 if (pid == 0) { //you are the root process 
422                                                                 
423                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
424                                         
425                                         //send file positions to all processes
426                                         for(int i = 1; i < processors; i++) { 
427                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
428                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
429                                         }
430                                         
431                                         //figure out how many sequences you have to align
432                                         numSeqsPerProcessor = numSeqs / processors;
433                                         int startIndex =  pid * numSeqsPerProcessor;
434                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
435                                 
436                                         //do your part
437                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
438                                         
439                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
440                                         
441                                 }else{ //you are a child process
442                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
443                                         MPIPos.resize(numSeqs+1);
444                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
445                                         
446                                         //figure out how many sequences you have to align
447                                         numSeqsPerProcessor = numSeqs / processors;
448                                         int startIndex =  pid * numSeqsPerProcessor;
449                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
450                                         
451                                         //do your part
452                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
453                                         
454                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
455                                 }
456                                 
457                                 //close files 
458                                 MPI_File_close(&inMPI);
459                                 MPI_File_close(&outMPI);
460                                 MPI_File_close(&outMPIAccnos);
461                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
462                 #else
463                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
464                                 
465                         for (int i = 0; i < (positions.size()-1); i++) {
466                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
467                         }       
468                         
469                         //break up file
470                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
471                                 if(processors == 1){
472                 
473                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
474                                         
475                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
476                                         
477                                 }else{
478                                         processIDS.resize(0);
479                                         
480                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
481                                 
482                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
483                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
484                                                 
485                                         //append output files
486                                         for(int i=1;i<processors;i++){
487                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
488                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
489                                         }
490                                         
491                                         //append output files
492                                         for(int i=1;i<processors;i++){
493                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
494                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
495                                         }
496                                                                                 
497                                         if (m->control_pressed) { 
498                                                 remove(outputFileName.c_str()); 
499                                                 remove(accnosFileName.c_str());
500                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
501                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
502                                                 delete chimera;
503                                                 return 0;
504                                         }
505                                 }
506
507                         #else
508                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
509                                 
510                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
511                         #endif
512                         
513                 #endif  
514                 
515                         delete chimera;
516                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
517                         
518                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
519                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
520                         
521                         m->mothurOutEndLine();
522                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
523                 }
524                 
525                 //set accnos file as new current accnosfile
526                 string current = "";
527                 itTypes = outputTypes.find("accnos");
528                 if (itTypes != outputTypes.end()) {
529                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
530                 }
531                 
532                 m->mothurOutEndLine();
533                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
534                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
535                 m->mothurOutEndLine();
536                         
537                 return 0;
538                 
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "ChimeraPintailCommand", "execute");
542                 exit(1);
543         }
544 }
545 //**********************************************************************************************************************
546
547 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
548         try {
549                 ofstream out;
550                 m->openOutputFile(outputFName, out);
551                 
552                 ofstream out2;
553                 m->openOutputFile(accnos, out2);
554                 
555                 ifstream inFASTA;
556                 m->openInputFile(filename, inFASTA);
557
558                 inFASTA.seekg(filePos->start);
559
560                 bool done = false;
561                 int count = 0;
562         
563                 while (!done) {
564                                 
565                         if (m->control_pressed) {       return 1;       }
566                 
567                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
568                                 
569                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
570                                 
571                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
572                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
573                                 }else{
574                                         //find chimeras
575                                         chimera->getChimeras(candidateSeq);
576                                         
577                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
578                 
579                                         //print results
580                                         chimera->print(out, out2);
581                                 }
582                                 count++;
583                         }
584                         delete candidateSeq;
585                         
586                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
587                                 unsigned long int pos = inFASTA.tellg();
588                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
589                         #else
590                                 if (inFASTA.eof()) { break; }
591                         #endif
592                         
593                         //report progress
594                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
595                 }
596                 //report progress
597                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
598                 
599                 out.close();
600                 out2.close();
601                 inFASTA.close();
602                                 
603                 return count;
604         }
605         catch(exception& e) {
606                 m->errorOut(e, "ChimeraPintailCommand", "driver");
607                 exit(1);
608         }
609 }
610 //**********************************************************************************************************************
611 #ifdef USE_MPI
612 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
613         try {
614                                 
615                 MPI_Status status; 
616                 int pid;
617                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
618                 
619                 for(int i=0;i<num;i++){
620                         
621                         if (m->control_pressed) {       return 1;       }
622                         
623                         //read next sequence
624                         int length = MPIPos[start+i+1] - MPIPos[start+i];
625         
626                         char* buf4 = new char[length];
627                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
628                         
629                         string tempBuf = buf4;
630                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
631                         istringstream iss (tempBuf,istringstream::in);
632                         delete buf4;
633
634                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
635                                 
636                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
637                                 
638                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
639                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
640                                 }else{
641                                         //find chimeras
642                                         chimera->getChimeras(candidateSeq);
643                                         
644                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
645                 
646                                         //print results
647                                         chimera->print(outMPI, outAccMPI);
648                                 }
649                         }
650                         delete candidateSeq;
651                         
652                         //report progress
653                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
654                 }
655                 //report progress
656                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
657                 
658                                 
659                 return 0;
660         }
661         catch(exception& e) {
662                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
663                 exit(1);
664         }
665 }
666 #endif
667
668 /**************************************************************************************************/
669
670 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
671         try {
672 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
673                 int process = 0;
674                 int num = 0;
675                 
676                 //loop through and create all the processes you want
677                 while (process != processors) {
678                         int pid = fork();
679                         
680                         if (pid > 0) {
681                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
682                                 process++;
683                         }else if (pid == 0){
684                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
685                                 
686                                 //pass numSeqs to parent
687                                 ofstream out;
688                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
689                                 m->openOutputFile(tempFile, out);
690                                 out << num << endl;
691                                 out.close();
692
693                                 exit(0);
694                         }else { 
695                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
696                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
697                                 exit(0);
698                         }
699                 }
700                 
701                 //force parent to wait until all the processes are done
702                 for (int i=0;i<processors;i++) { 
703                         int temp = processIDS[i];
704                         wait(&temp);
705                 }
706                 
707                 for (int i = 0; i < processIDS.size(); i++) {
708                         ifstream in;
709                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
710                         m->openInputFile(tempFile, in);
711                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
712                         in.close(); remove(tempFile.c_str());
713                 }
714                 
715                 return num;
716 #endif          
717         }
718         catch(exception& e) {
719                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
720                 exit(1);
721         }
722 }
723
724 /**************************************************************************************************/
725
726