]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){  
15         try {
16                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19                 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22                 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){  
39         try {
40                 string helpString = "";
41                 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42                 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43                 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n";
49                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
50 #ifdef USE_MPI
51                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
52 #endif
53                 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54                 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55                 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56                 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57                 helpString += "The chimera.pintail command should be in the following format: \n";
58                 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59                 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n";     
61                 return helpString;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){ 
70         try {
71                 abort = true; calledHelp = true;
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["chimera"] = tempOutNames;
75                 outputTypes["accnos"] = tempOutNames;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
79                 exit(1);
80         }
81 }
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
84         try {
85                 abort = false; calledHelp = false;   
86                 
87                 //allow user to run help
88                 if(option == "help") { help(); abort = true; calledHelp = true; }
89                 
90                 else {
91                         vector<string> myArray = setParameters();
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         
96                         ValidParameters validParameter("chimera.pintail");
97                         map<string,string>::iterator it;
98                         
99                         //check to make sure all parameters are valid for command
100                         for (it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         vector<string> tempOutNames;
105                         outputTypes["chimera"] = tempOutNames;
106                         outputTypes["accnos"] = tempOutNames;
107                 
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("reference");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
119                                 }
120                                 
121                                 it = parameters.find("conservation");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
127                                 }
128                                 
129                                 it = parameters.find("quantile");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
135                                 }
136                         }
137
138                         
139                         //check for required parameters
140                         fastafile = validParameter.validFile(parameters, "fasta", false);
141                         if (fastafile == "not found") {                                 
142                                 //if there is a current fasta file, use it
143                                 string filename = m->getFastaFile(); 
144                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
145                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
146                         }else { 
147                                 m->splitAtDash(fastafile, fastaFileNames);
148                                 
149                                 //go through files and make sure they are good, if not, then disregard them
150                                 for (int i = 0; i < fastaFileNames.size(); i++) {
151                                         if (inputDir != "") {
152                                                 string path = m->hasPath(fastaFileNames[i]);
153                                                 //if the user has not given a path then, add inputdir. else leave path alone.
154                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
155                                         }
156         
157                                         int ableToOpen;
158                                         ifstream in;
159                                         
160                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
161                                 
162                                         //if you can't open it, try default location
163                                         if (ableToOpen == 1) {
164                                                 if (m->getDefaultPath() != "") { //default path is set
165                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
166                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
167                                                         ifstream in2;
168                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
169                                                         in2.close();
170                                                         fastaFileNames[i] = tryPath;
171                                                 }
172                                         }
173                                         
174                                         if (ableToOpen == 1) {
175                                                 if (m->getOutputDir() != "") { //default path is set
176                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
177                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
178                                                         ifstream in2;
179                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
180                                                         in2.close();
181                                                         fastaFileNames[i] = tryPath;
182                                                 }
183                                         }
184
185                                         in.close();
186                                         
187                                         if (ableToOpen == 1) { 
188                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
189                                                 //erase from file list
190                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
191                                                 i--;
192                                         }
193                                 }
194                                 
195                                 //make sure there is at least one valid file left
196                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
197                         }
198                         
199                         string temp;
200                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
201                         filter = m->isTrue(temp);
202                         
203                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
204                         m->setProcessors(temp);
205                         convert(temp, processors);
206                         
207                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
208                         convert(temp, window);
209                         
210                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
211                         convert(temp, increment);
212                         
213                         maskfile = validParameter.validFile(parameters, "mask", false);
214                         if (maskfile == "not found") { maskfile = "";  }        
215                         else if (maskfile != "default")  { 
216                                 if (inputDir != "") {
217                                         string path = m->hasPath(maskfile);
218                                         //if the user has not given a path then, add inputdir. else leave path alone.
219                                         if (path == "") {       maskfile = inputDir + maskfile;         }
220                                 }
221
222                                 ifstream in;
223                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
224                                 if (ableToOpen == 1) { 
225                                         if (m->getDefaultPath() != "") { //default path is set
226                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
227                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
228                                                         ifstream in2;
229                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
230                                                         in2.close();
231                                                         maskfile = tryPath;
232                                         }
233                                 }
234                                 
235                                 if (ableToOpen == 1) {
236                                                 if (m->getOutputDir() != "") { //default path is set
237                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
238                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
239                                                         ifstream in2;
240                                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
241                                                         in2.close();
242                                                         maskfile = tryPath;
243                                                 }
244                                 }
245                                 
246                                 in.close();
247                                         
248                                 if (ableToOpen == 1) { 
249                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
250                                                 abort = true;
251                                 }
252                         }
253
254                         
255                         //if the user changes the output directory command factory will send this info to us in the output parameter 
256                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
257                 
258                         templatefile = validParameter.validFile(parameters, "reference", true);
259                         if (templatefile == "not open") { abort = true; }
260                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
261                         
262                         consfile = validParameter.validFile(parameters, "conservation", true);
263                         if (consfile == "not open") { abort = true; }
264                         else if (consfile == "not found") { 
265                                 consfile = "";  
266                                 //check for consfile
267                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
268                                 ifstream FileTest(tempConsFile.c_str());
269                                 if(FileTest){   
270                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
271                                         if (GoodFile) {  
272                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
273                                         }
274                                 }else {
275                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
276                                         ifstream FileTest2(tempConsFile.c_str());
277                                         if(FileTest2){  
278                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
279                                                 if (GoodFile) {  
280                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
281                                                 }
282                                         }
283                                 }
284                         }       
285                         
286                         quanfile = validParameter.validFile(parameters, "quantile", true);
287                         if (quanfile == "not open") { abort = true; }
288                         else if (quanfile == "not found") { quanfile = ""; }
289                 }
290         }
291         catch(exception& e) {
292                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
293                 exit(1);
294         }
295 }
296 //***************************************************************************************************************
297
298 int ChimeraPintailCommand::execute(){
299         try{
300                 
301                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
302                 
303                 for (int s = 0; s < fastaFileNames.size(); s++) {
304                                 
305                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
306
307                         int start = time(NULL); 
308                         
309                         //set user options
310                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
311                         
312                         //check for quantile to save the time
313                         string tempQuan = "";
314                         if ((!filter) && (maskfile == "")) {
315                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
316                         }else if ((!filter) && (maskfile != "")) { 
317                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
318                         }else if ((filter) && (maskfile != "")) { 
319                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
320                         }else if ((filter) && (maskfile == "")) { 
321                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
322                         }
323                         
324                         ifstream FileTest(tempQuan.c_str());
325                         if(FileTest){   
326                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
327                                 if (GoodFile) {  
328                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
329                                 }
330                         }else {
331                                 string tryPath = m->getDefaultPath();
332                                 string tempQuan = "";
333                                 if ((!filter) && (maskfile == "")) {
334                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
335                                 }else if ((!filter) && (maskfile != "")) { 
336                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
337                                 }else if ((filter) && (maskfile != "")) { 
338                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
339                                 }else if ((filter) && (maskfile == "")) { 
340                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
341                                 }
342                                 
343                                 ifstream FileTest2(tempQuan.c_str());
344                                 if(FileTest2){  
345                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
346                                         if (GoodFile) {  
347                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
348                                         }
349                                 }
350                         }
351                         
352                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
353                         
354                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
355                         string outputFileName, accnosFileName;
356                         if (maskfile != "") {
357                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
358                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
359                         }else {
360                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
361                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
362                         }
363                         
364                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
365                         
366                         if (chimera->getUnaligned()) { 
367                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
368                                 delete chimera;
369                                 return 0; 
370                         }
371                         templateSeqsLength = chimera->getLength();
372                 
373                 #ifdef USE_MPI
374                         int pid, numSeqsPerProcessor; 
375                                 int tag = 2001;
376                                 vector<unsigned long int> MPIPos;
377                                 
378                                 MPI_Status status; 
379                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
380                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
381
382                                 MPI_File inMPI;
383                                 MPI_File outMPI;
384                                 MPI_File outMPIAccnos;
385                                 
386                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
387                                 int inMode=MPI_MODE_RDONLY; 
388                                 
389                                 char outFilename[1024];
390                                 strcpy(outFilename, outputFileName.c_str());
391                                 
392                                 char outAccnosFilename[1024];
393                                 strcpy(outAccnosFilename, accnosFileName.c_str());
394                                 
395                                 char inFileName[1024];
396                                 strcpy(inFileName, fastaFileNames[s].c_str());
397
398                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
399                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
400                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
401                                 
402                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
403
404                                 if (pid == 0) { //you are the root process 
405                                                                 
406                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
407                                         
408                                         //send file positions to all processes
409                                         for(int i = 1; i < processors; i++) { 
410                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
411                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
412                                         }
413                                         
414                                         //figure out how many sequences you have to align
415                                         numSeqsPerProcessor = numSeqs / processors;
416                                         int startIndex =  pid * numSeqsPerProcessor;
417                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
418                                 
419                                         //do your part
420                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
421                                         
422                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
423                                         
424                                 }else{ //you are a child process
425                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
426                                         MPIPos.resize(numSeqs+1);
427                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
428                                         
429                                         //figure out how many sequences you have to align
430                                         numSeqsPerProcessor = numSeqs / processors;
431                                         int startIndex =  pid * numSeqsPerProcessor;
432                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
433                                         
434                                         //do your part
435                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
436                                         
437                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
438                                 }
439                                 
440                                 //close files 
441                                 MPI_File_close(&inMPI);
442                                 MPI_File_close(&outMPI);
443                                 MPI_File_close(&outMPIAccnos);
444                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
445                 #else
446                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
447                                 
448                         for (int i = 0; i < (positions.size()-1); i++) {
449                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
450                         }       
451                         
452                         //break up file
453                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
454                                 if(processors == 1){
455                 
456                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
457                                         
458                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
459                                         
460                                 }else{
461                                         processIDS.resize(0);
462                                         
463                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
464                                 
465                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
466                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
467                                                 
468                                         //append output files
469                                         for(int i=1;i<processors;i++){
470                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
471                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
472                                         }
473                                         
474                                         //append output files
475                                         for(int i=1;i<processors;i++){
476                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
477                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
478                                         }
479                                                                                 
480                                         if (m->control_pressed) { 
481                                                 remove(outputFileName.c_str()); 
482                                                 remove(accnosFileName.c_str());
483                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
484                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
485                                                 delete chimera;
486                                                 return 0;
487                                         }
488                                 }
489
490                         #else
491                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
492                                 
493                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
494                         #endif
495                         
496                 #endif  
497                 
498                         delete chimera;
499                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
500                         
501                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
502                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
503                         
504                         m->mothurOutEndLine();
505                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
506                 }
507                 
508                 //set accnos file as new current accnosfile
509                 string current = "";
510                 itTypes = outputTypes.find("accnos");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
513                 }
514                 
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
518                 m->mothurOutEndLine();
519                         
520                 return 0;
521                 
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "ChimeraPintailCommand", "execute");
525                 exit(1);
526         }
527 }
528 //**********************************************************************************************************************
529
530 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
531         try {
532                 ofstream out;
533                 m->openOutputFile(outputFName, out);
534                 
535                 ofstream out2;
536                 m->openOutputFile(accnos, out2);
537                 
538                 ifstream inFASTA;
539                 m->openInputFile(filename, inFASTA);
540
541                 inFASTA.seekg(filePos->start);
542
543                 bool done = false;
544                 int count = 0;
545         
546                 while (!done) {
547                                 
548                         if (m->control_pressed) {       return 1;       }
549                 
550                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
551                                 
552                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
553                                 
554                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
555                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
556                                 }else{
557                                         //find chimeras
558                                         chimera->getChimeras(candidateSeq);
559                                         
560                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
561                 
562                                         //print results
563                                         chimera->print(out, out2);
564                                 }
565                                 count++;
566                         }
567                         delete candidateSeq;
568                         
569                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
570                                 unsigned long int pos = inFASTA.tellg();
571                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
572                         #else
573                                 if (inFASTA.eof()) { break; }
574                         #endif
575                         
576                         //report progress
577                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
578                 }
579                 //report progress
580                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
581                 
582                 out.close();
583                 out2.close();
584                 inFASTA.close();
585                                 
586                 return count;
587         }
588         catch(exception& e) {
589                 m->errorOut(e, "ChimeraPintailCommand", "driver");
590                 exit(1);
591         }
592 }
593 //**********************************************************************************************************************
594 #ifdef USE_MPI
595 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
596         try {
597                                 
598                 MPI_Status status; 
599                 int pid;
600                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
601                 
602                 for(int i=0;i<num;i++){
603                         
604                         if (m->control_pressed) {       return 1;       }
605                         
606                         //read next sequence
607                         int length = MPIPos[start+i+1] - MPIPos[start+i];
608         
609                         char* buf4 = new char[length];
610                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
611                         
612                         string tempBuf = buf4;
613                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
614                         istringstream iss (tempBuf,istringstream::in);
615                         delete buf4;
616
617                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
618                                 
619                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
620                                 
621                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
622                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
623                                 }else{
624                                         //find chimeras
625                                         chimera->getChimeras(candidateSeq);
626                                         
627                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
628                 
629                                         //print results
630                                         chimera->print(outMPI, outAccMPI);
631                                 }
632                         }
633                         delete candidateSeq;
634                         
635                         //report progress
636                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
637                 }
638                 //report progress
639                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
640                 
641                                 
642                 return 0;
643         }
644         catch(exception& e) {
645                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
646                 exit(1);
647         }
648 }
649 #endif
650
651 /**************************************************************************************************/
652
653 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
654         try {
655 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
656                 int process = 0;
657                 int num = 0;
658                 
659                 //loop through and create all the processes you want
660                 while (process != processors) {
661                         int pid = fork();
662                         
663                         if (pid > 0) {
664                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
665                                 process++;
666                         }else if (pid == 0){
667                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
668                                 
669                                 //pass numSeqs to parent
670                                 ofstream out;
671                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
672                                 m->openOutputFile(tempFile, out);
673                                 out << num << endl;
674                                 out.close();
675
676                                 exit(0);
677                         }else { 
678                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
679                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
680                                 exit(0);
681                         }
682                 }
683                 
684                 //force parent to wait until all the processes are done
685                 for (int i=0;i<processors;i++) { 
686                         int temp = processIDS[i];
687                         wait(&temp);
688                 }
689                 
690                 for (int i = 0; i < processIDS.size(); i++) {
691                         ifstream in;
692                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
693                         m->openInputFile(tempFile, in);
694                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
695                         in.close(); remove(tempFile.c_str());
696                 }
697                 
698                 return num;
699 #endif          
700         }
701         catch(exception& e) {
702                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
703                 exit(1);
704         }
705 }
706
707 /**************************************************************************************************/
708
709