]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::getValidParameters(){     
15         try {
16                 string AlignArray[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ChimeraPintailCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ChimeraPintailCommand::ChimeraPintailCommand(){ 
27         try {
28                 vector<string> tempOutNames;
29                 outputTypes["chimera"] = tempOutNames;
30                 outputTypes["accnos"] = tempOutNames;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> ChimeraPintailCommand::getRequiredParameters(){  
39         try {
40                 string AlignArray[] =  {"template","fasta"};
41                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 vector<string> ChimeraPintailCommand::getRequiredFiles(){       
51         try {
52                 vector<string> myArray;
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "ChimeraPintailCommand", "getRequiredFiles");
57                 exit(1);
58         }
59 }
60 //***************************************************************************************************************
61 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
62         try {
63                 abort = false;
64                 
65                 //allow user to run help
66                 if(option == "help") { help(); abort = true; }
67                 
68                 else {
69                         //valid paramters for this command
70                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
71                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
72                         
73                         OptionParser parser(option);
74                         map<string,string> parameters = parser.getParameters();
75                         
76                         ValidParameters validParameter("chimera.pintail");
77                         map<string,string>::iterator it;
78                         
79                         //check to make sure all parameters are valid for command
80                         for (it = parameters.begin(); it != parameters.end(); it++) { 
81                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
82                         }
83                         
84                         vector<string> tempOutNames;
85                         outputTypes["chimera"] = tempOutNames;
86                         outputTypes["accnos"] = tempOutNames;
87                 
88                         //if the user changes the input directory command factory will send this info to us in the output parameter 
89                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
90                         if (inputDir == "not found"){   inputDir = "";          }
91                         else {
92                                 string path;
93                                 it = parameters.find("template");
94                                 //user has given a template file
95                                 if(it != parameters.end()){ 
96                                         path = m->hasPath(it->second);
97                                         //if the user has not given a path then, add inputdir. else leave path alone.
98                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
99                                 }
100                                 
101                                 it = parameters.find("conservation");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
107                                 }
108                                 
109                                 it = parameters.find("quantile");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
115                                 }
116                         }
117
118                         
119                         //check for required parameters
120                         fastafile = validParameter.validFile(parameters, "fasta", false);
121                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
122                         else { 
123                                 m->splitAtDash(fastafile, fastaFileNames);
124                                 
125                                 //go through files and make sure they are good, if not, then disregard them
126                                 for (int i = 0; i < fastaFileNames.size(); i++) {
127                                         if (inputDir != "") {
128                                                 string path = m->hasPath(fastaFileNames[i]);
129                                                 //if the user has not given a path then, add inputdir. else leave path alone.
130                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
131                                         }
132         
133                                         int ableToOpen;
134                                         ifstream in;
135                                         
136                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
137                                 
138                                         //if you can't open it, try default location
139                                         if (ableToOpen == 1) {
140                                                 if (m->getDefaultPath() != "") { //default path is set
141                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
142                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
143                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
144                                                         fastaFileNames[i] = tryPath;
145                                                 }
146                                         }
147                                         
148                                         if (ableToOpen == 1) {
149                                                 if (m->getOutputDir() != "") { //default path is set
150                                                         string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
151                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
152                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
153                                                         fastaFileNames[i] = tryPath;
154                                                 }
155                                         }
156
157                                         in.close();
158                                         
159                                         if (ableToOpen == 1) { 
160                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
161                                                 //erase from file list
162                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
163                                                 i--;
164                                         }
165                                 }
166                                 
167                                 //make sure there is at least one valid file left
168                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
169                         }
170                         
171                         string temp;
172                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
173                         filter = m->isTrue(temp);
174                         
175                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
176                         convert(temp, processors);
177                         
178                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
179                         convert(temp, window);
180                         
181                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
182                         convert(temp, increment);
183                         
184                         maskfile = validParameter.validFile(parameters, "mask", false);
185                         if (maskfile == "not found") { maskfile = "";  }        
186                         else if (maskfile != "default")  { 
187                                 if (inputDir != "") {
188                                         string path = m->hasPath(maskfile);
189                                         //if the user has not given a path then, add inputdir. else leave path alone.
190                                         if (path == "") {       maskfile = inputDir + maskfile;         }
191                                 }
192
193                                 ifstream in;
194                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
195                                 if (ableToOpen == 1) { 
196                                         if (m->getDefaultPath() != "") { //default path is set
197                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
198                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
199                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
200                                                         maskfile = tryPath;
201                                         }
202                                 }
203                                 
204                                 if (ableToOpen == 1) {
205                                                 if (m->getOutputDir() != "") { //default path is set
206                                                         string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
207                                                         m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
208                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
209                                                         maskfile = tryPath;
210                                                 }
211                                 }
212                                 
213                                 in.close();
214                                         
215                                 if (ableToOpen == 1) { 
216                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
217                                                 abort = true;
218                                 }
219                         }
220
221                         
222                         //if the user changes the output directory command factory will send this info to us in the output parameter 
223                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
224                 
225                         templatefile = validParameter.validFile(parameters, "template", true);
226                         if (templatefile == "not open") { abort = true; }
227                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
228                         
229                         consfile = validParameter.validFile(parameters, "conservation", true);
230                         if (consfile == "not open") { abort = true; }
231                         else if (consfile == "not found") { 
232                                 consfile = "";  
233                                 //check for consfile
234                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
235                                 ifstream FileTest(tempConsFile.c_str());
236                                 if(FileTest){   
237                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
238                                         if (GoodFile) {  
239                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
240                                         }
241                                 }else {
242                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
243                                         ifstream FileTest2(tempConsFile.c_str());
244                                         if(FileTest2){  
245                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
246                                                 if (GoodFile) {  
247                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
248                                                 }
249                                         }
250                                 }
251                         }       
252                         
253                         quanfile = validParameter.validFile(parameters, "quantile", true);
254                         if (quanfile == "not open") { abort = true; }
255                         else if (quanfile == "not found") { quanfile = ""; }
256                 }
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
260                 exit(1);
261         }
262 }
263 //**********************************************************************************************************************
264
265 void ChimeraPintailCommand::help(){
266         try {
267         
268                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
269                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
270                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
271                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
272                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
273                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
274                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
275                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
276                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
277                 #ifdef USE_MPI
278                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
279                 #endif
280                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
281                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
282                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
283                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
284                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
285                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
286                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
287                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
288         }
289         catch(exception& e) {
290                 m->errorOut(e, "ChimeraPintailCommand", "help");
291                 exit(1);
292         }
293 }
294
295 //***************************************************************************************************************
296
297 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
298
299 //***************************************************************************************************************
300
301 int ChimeraPintailCommand::execute(){
302         try{
303                 
304                 if (abort == true) { return 0; }
305                 
306                 for (int s = 0; s < fastaFileNames.size(); s++) {
307                                 
308                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
309
310                         int start = time(NULL); 
311                         
312                         //set user options
313                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
314                         
315                         //check for quantile to save the time
316                         string tempQuan = "";
317                         if ((!filter) && (maskfile == "")) {
318                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
319                         }else if ((!filter) && (maskfile != "")) { 
320                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
321                         }else if ((filter) && (maskfile != "")) { 
322                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
323                         }else if ((filter) && (maskfile == "")) { 
324                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
325                         }
326                         
327                         ifstream FileTest(tempQuan.c_str());
328                         if(FileTest){   
329                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
330                                 if (GoodFile) {  
331                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
332                                 }
333                         }else {
334                                 string tryPath = m->getDefaultPath();
335                                 string tempQuan = "";
336                                 if ((!filter) && (maskfile == "")) {
337                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
338                                 }else if ((!filter) && (maskfile != "")) { 
339                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
340                                 }else if ((filter) && (maskfile != "")) { 
341                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
342                                 }else if ((filter) && (maskfile == "")) { 
343                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
344                                 }
345                                 
346                                 ifstream FileTest2(tempQuan.c_str());
347                                 if(FileTest2){  
348                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
349                                         if (GoodFile) {  
350                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
351                                         }
352                                 }
353                         }
354                         
355                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
356                         
357                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
358                         string outputFileName, accnosFileName;
359                         if (maskfile != "") {
360                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
361                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
362                         }else {
363                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
364                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
365                         }
366                         
367                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
368                         
369                         if (chimera->getUnaligned()) { 
370                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
371                                 delete chimera;
372                                 return 0; 
373                         }
374                         templateSeqsLength = chimera->getLength();
375                 
376                 #ifdef USE_MPI
377                         int pid, end, numSeqsPerProcessor; 
378                                 int tag = 2001;
379                                 vector<unsigned long int> MPIPos;
380                                 
381                                 MPI_Status status; 
382                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
383                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
384
385                                 MPI_File inMPI;
386                                 MPI_File outMPI;
387                                 MPI_File outMPIAccnos;
388                                 
389                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
390                                 int inMode=MPI_MODE_RDONLY; 
391                                 
392                                 char outFilename[1024];
393                                 strcpy(outFilename, outputFileName.c_str());
394                                 
395                                 char outAccnosFilename[1024];
396                                 strcpy(outAccnosFilename, accnosFileName.c_str());
397                                 
398                                 char inFileName[1024];
399                                 strcpy(inFileName, fastaFileNames[s].c_str());
400
401                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
402                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
403                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
404                                 
405                                 if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
406
407                                 if (pid == 0) { //you are the root process 
408                                                                 
409                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
410                                         
411                                         //send file positions to all processes
412                                         for(int i = 1; i < processors; i++) { 
413                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
414                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
415                                         }
416                                         
417                                         //figure out how many sequences you have to align
418                                         numSeqsPerProcessor = numSeqs / processors;
419                                         int startIndex =  pid * numSeqsPerProcessor;
420                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
421                                 
422                                         //do your part
423                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
424                                         
425                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
426                                         
427                                 }else{ //you are a child process
428                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
429                                         MPIPos.resize(numSeqs+1);
430                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
431                                         
432                                         //figure out how many sequences you have to align
433                                         numSeqsPerProcessor = numSeqs / processors;
434                                         int startIndex =  pid * numSeqsPerProcessor;
435                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
436                                         
437                                         //do your part
438                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
439                                         
440                                         if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
441                                 }
442                                 
443                                 //close files 
444                                 MPI_File_close(&inMPI);
445                                 MPI_File_close(&outMPI);
446                                 MPI_File_close(&outMPIAccnos);
447                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
448                 #else
449                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
450                                 
451                         for (int i = 0; i < (positions.size()-1); i++) {
452                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
453                         }       
454                         
455                         //break up file
456                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
457                                 if(processors == 1){
458                 
459                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
460                                         
461                                         if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
462                                         
463                                 }else{
464                                         processIDS.resize(0);
465                                         
466                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
467                                 
468                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
469                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
470                                                 
471                                         //append output files
472                                         for(int i=1;i<processors;i++){
473                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
474                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
475                                         }
476                                         
477                                         //append output files
478                                         for(int i=1;i<processors;i++){
479                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
480                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
481                                         }
482                                                                                 
483                                         if (m->control_pressed) { 
484                                                 remove(outputFileName.c_str()); 
485                                                 remove(accnosFileName.c_str());
486                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
487                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
488                                                 delete chimera;
489                                                 return 0;
490                                         }
491                                 }
492
493                         #else
494                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
495                                 
496                                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
497                         #endif
498                         
499                 #endif  
500                 
501                         delete chimera;
502                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
503                         
504                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
505                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
506                         
507                         m->mothurOutEndLine();
508                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
509                 }
510                 
511                 m->mothurOutEndLine();
512                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
513                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
514                 m->mothurOutEndLine();
515                         
516                 return 0;
517                 
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "ChimeraPintailCommand", "execute");
521                 exit(1);
522         }
523 }
524 //**********************************************************************************************************************
525
526 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
527         try {
528                 ofstream out;
529                 m->openOutputFile(outputFName, out);
530                 
531                 ofstream out2;
532                 m->openOutputFile(accnos, out2);
533                 
534                 ifstream inFASTA;
535                 m->openInputFile(filename, inFASTA);
536
537                 inFASTA.seekg(filePos->start);
538
539                 bool done = false;
540                 int count = 0;
541         
542                 while (!done) {
543                                 
544                         if (m->control_pressed) {       return 1;       }
545                 
546                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
547                                 
548                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
549                                 
550                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
551                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
552                                 }else{
553                                         //find chimeras
554                                         chimera->getChimeras(candidateSeq);
555                                         
556                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
557                 
558                                         //print results
559                                         chimera->print(out, out2);
560                                 }
561                                 count++;
562                         }
563                         delete candidateSeq;
564                         
565                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
566                                 unsigned long int pos = inFASTA.tellg();
567                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
568                         #else
569                                 if (inFASTA.eof()) { break; }
570                         #endif
571                         
572                         //report progress
573                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
574                 }
575                 //report progress
576                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
577                 
578                 out.close();
579                 out2.close();
580                 inFASTA.close();
581                                 
582                 return count;
583         }
584         catch(exception& e) {
585                 m->errorOut(e, "ChimeraPintailCommand", "driver");
586                 exit(1);
587         }
588 }
589 //**********************************************************************************************************************
590 #ifdef USE_MPI
591 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
592         try {
593                                 
594                 MPI_Status status; 
595                 int pid;
596                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
597                 
598                 for(int i=0;i<num;i++){
599                         
600                         if (m->control_pressed) {       return 1;       }
601                         
602                         //read next sequence
603                         int length = MPIPos[start+i+1] - MPIPos[start+i];
604         
605                         char* buf4 = new char[length];
606                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
607                         
608                         string tempBuf = buf4;
609                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
610                         istringstream iss (tempBuf,istringstream::in);
611                         delete buf4;
612
613                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
614                                 
615                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
616                                 
617                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
618                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
619                                 }else{
620                                         //find chimeras
621                                         chimera->getChimeras(candidateSeq);
622                                         
623                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
624                 
625                                         //print results
626                                         bool isChimeric = chimera->print(outMPI, outAccMPI);
627                                 }
628                         }
629                         delete candidateSeq;
630                         
631                         //report progress
632                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
633                 }
634                 //report progress
635                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
636                 
637                                 
638                 return 0;
639         }
640         catch(exception& e) {
641                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
642                 exit(1);
643         }
644 }
645 #endif
646
647 /**************************************************************************************************/
648
649 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
650         try {
651 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
652                 int process = 0;
653                 int num = 0;
654                 
655                 //loop through and create all the processes you want
656                 while (process != processors) {
657                         int pid = fork();
658                         
659                         if (pid > 0) {
660                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
661                                 process++;
662                         }else if (pid == 0){
663                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
664                                 
665                                 //pass numSeqs to parent
666                                 ofstream out;
667                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
668                                 m->openOutputFile(tempFile, out);
669                                 out << num << endl;
670                                 out.close();
671
672                                 exit(0);
673                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
674                 }
675                 
676                 //force parent to wait until all the processes are done
677                 for (int i=0;i<processors;i++) { 
678                         int temp = processIDS[i];
679                         wait(&temp);
680                 }
681                 
682                 for (int i = 0; i < processIDS.size(); i++) {
683                         ifstream in;
684                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
685                         m->openInputFile(tempFile, in);
686                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
687                         in.close(); remove(tempFile.c_str());
688                 }
689                 
690                 return num;
691 #endif          
692         }
693         catch(exception& e) {
694                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
695                 exit(1);
696         }
697 }
698
699 /**************************************************************************************************/
700
701