]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
modified chimera commands to process multiple fasta files and added checks to pintail...
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //***************************************************************************************************************
14
15 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         map<string,string>::iterator it;
32                         
33                         //check to make sure all parameters are valid for command
34                         for (it = parameters.begin(); it != parameters.end(); it++) { 
35                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
36                         }
37                         
38                         //if the user changes the input directory command factory will send this info to us in the output parameter 
39                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
40                         if (inputDir == "not found"){   inputDir = "";          }
41                         else {
42                                 string path;
43                                 it = parameters.find("template");
44                                 //user has given a template file
45                                 if(it != parameters.end()){ 
46                                         path = hasPath(it->second);
47                                         //if the user has not given a path then, add inputdir. else leave path alone.
48                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
49                                 }
50                                 
51                                 it = parameters.find("conservation");
52                                 //user has given a template file
53                                 if(it != parameters.end()){ 
54                                         path = hasPath(it->second);
55                                         //if the user has not given a path then, add inputdir. else leave path alone.
56                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
57                                 }
58                                 
59                                 it = parameters.find("quantile");
60                                 //user has given a template file
61                                 if(it != parameters.end()){ 
62                                         path = hasPath(it->second);
63                                         //if the user has not given a path then, add inputdir. else leave path alone.
64                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
65                                 }
66                         }
67
68                         
69                         //check for required parameters
70                         fastafile = validParameter.validFile(parameters, "fasta", false);
71                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
72                         else { 
73                                 splitAtDash(fastafile, fastaFileNames);
74                                 
75                                 //go through files and make sure they are good, if not, then disregard them
76                                 for (int i = 0; i < fastaFileNames.size(); i++) {
77                                         if (inputDir != "") {
78                                                 string path = hasPath(fastaFileNames[i]);
79                                                 //if the user has not given a path then, add inputdir. else leave path alone.
80                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
81                                         }
82         
83                                         int ableToOpen;
84                                         ifstream in;
85                                         
86                                         #ifdef USE_MPI  
87                                                 int pid;
88                                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
89                                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
90                                 
91                                                 if (pid == 0) {
92                                         #endif
93
94                                         ableToOpen = openInputFile(fastaFileNames[i], in);
95                                         in.close();
96                                         
97                                         #ifdef USE_MPI  
98                                                         for (int j = 1; j < processors; j++) {
99                                                                 MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
100                                                         }
101                                                 }else{
102                                                         MPI_Status status;
103                                                         MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
104                                                 }
105                                                 
106                                         #endif
107
108                                         if (ableToOpen == 1) { 
109                                                 m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
110                                                 //erase from file list
111                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
112                                                 i--;
113                                         }
114                                 }
115                                 
116                                 //make sure there is at least one valid file left
117                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
118                         }
119                         
120                         string temp;
121                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
122                         filter = isTrue(temp);
123                         
124                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
125                         convert(temp, processors);
126                         
127                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
128                         convert(temp, window);
129                         
130                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
131                         convert(temp, increment);
132                         
133                         maskfile = validParameter.validFile(parameters, "mask", false);
134                         if (maskfile == "not found") { maskfile = "";  }        
135                         else if (maskfile != "default")  { 
136                                 if (inputDir != "") {
137                                         string path = hasPath(maskfile);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       maskfile = inputDir + maskfile;         }
140                                 }
141
142                                 ifstream in;
143                                 int     ableToOpen = openInputFile(maskfile, in);
144                                 if (ableToOpen == 1) { abort = true; }
145                                 in.close();
146                         }
147
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter 
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
151                                 outputDir = ""; 
152                                 outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
153                         }
154                 
155                         templatefile = validParameter.validFile(parameters, "template", true);
156                         if (templatefile == "not open") { abort = true; }
157                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
158                         
159                         consfile = validParameter.validFile(parameters, "conservation", true);
160                         if (consfile == "not open") { abort = true; }
161                         else if (consfile == "not found") { 
162                                 consfile = "";  
163                                 //check for consfile
164                                 string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
165                                 ifstream FileTest(tempConsFile.c_str());
166                                 if(FileTest){   m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     }
167                         }       
168                         
169                         quanfile = validParameter.validFile(parameters, "quantile", true);
170                         if (quanfile == "not open") { abort = true; }
171                         else if (quanfile == "not found") { quanfile = "";  }
172                 }
173         }
174         catch(exception& e) {
175                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
176                 exit(1);
177         }
178 }
179 //**********************************************************************************************************************
180
181 void ChimeraPintailCommand::help(){
182         try {
183         
184                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
185                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
186                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
187                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
188                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
189                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
190                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
191                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
192                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
193                 #ifdef USE_MPI
194                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
195                 #endif
196                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
197                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
198                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
199                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
200                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
201                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
202                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
203                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
204         }
205         catch(exception& e) {
206                 m->errorOut(e, "ChimeraPintailCommand", "help");
207                 exit(1);
208         }
209 }
210
211 //***************************************************************************************************************
212
213 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
214
215 //***************************************************************************************************************
216
217 int ChimeraPintailCommand::execute(){
218         try{
219                 
220                 if (abort == true) { return 0; }
221                 
222                 for (int s = 0; s < fastaFileNames.size(); s++) {
223                                 
224                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
225
226                         int start = time(NULL); 
227                         
228                         //set user options
229                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
230                         
231                         //check for quantile to save the time
232                         string tempQuan = "";
233                         if ((!filter) && (maskfile == "")) {
234                                 tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.quan";
235                         }else if ((!filter) && (maskfile != "")) { 
236                                 tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.masked.quan";
237                         }else if ((filter) && (maskfile != "")) { 
238                                 tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "masked.quan";
239                         }else if ((filter) && (maskfile == "")) { 
240                                 tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "quan";
241                         }
242                         
243                         ifstream FileTest(tempQuan.c_str());
244                         if(FileTest){   m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     }
245                         
246                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
247                         
248                         string outputFileName, accnosFileName;
249                         if (maskfile != "") {
250                                 outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras";
251                                 accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos";
252                         }else {
253                                 outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
254                                 accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
255                         }
256                         bool hasAccnos = true;
257                         
258                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
259                         
260                         if (chimera->getUnaligned()) { 
261                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
262                                 delete chimera;
263                                 return 0; 
264                         }
265                         templateSeqsLength = chimera->getLength();
266                 
267                 #ifdef USE_MPI
268                         int pid, end, numSeqsPerProcessor; 
269                                 int tag = 2001;
270                                 vector<long> MPIPos;
271                                 MPIWroteAccnos = false;
272                                 
273                                 MPI_Status status; 
274                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
275                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
276
277                                 MPI_File inMPI;
278                                 MPI_File outMPI;
279                                 MPI_File outMPIAccnos;
280                                 
281                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
282                                 int inMode=MPI_MODE_RDONLY; 
283                                 
284                                 char outFilename[1024];
285                                 strcpy(outFilename, outputFileName.c_str());
286                                 
287                                 char outAccnosFilename[1024];
288                                 strcpy(outAccnosFilename, accnosFileName.c_str());
289                                 
290                                 char inFileName[1024];
291                                 strcpy(inFileName, fastaFileNames[s].c_str());
292
293                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
294                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
295                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
296                                 
297                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
298
299                                 if (pid == 0) { //you are the root process 
300                                                                 
301                                         MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
302                                         
303                                         //send file positions to all processes
304                                         for(int i = 1; i < processors; i++) { 
305                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
306                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
307                                         }
308                                         
309                                         //figure out how many sequences you have to align
310                                         numSeqsPerProcessor = numSeqs / processors;
311                                         int startIndex =  pid * numSeqsPerProcessor;
312                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
313                                 
314                                         //do your part
315                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
316                                         
317                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
318                                         
319                                         for (int i = 1; i < processors; i++) {
320                                                 bool tempResult;
321                                                 MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
322                                                 if (tempResult != 0) { MPIWroteAccnos = true; }
323                                         }
324                                 }else{ //you are a child process
325                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
326                                         MPIPos.resize(numSeqs+1);
327                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
328                                         
329                                         //figure out how many sequences you have to align
330                                         numSeqsPerProcessor = numSeqs / processors;
331                                         int startIndex =  pid * numSeqsPerProcessor;
332                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
333                                         
334                                         //do your part
335                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
336                                         
337                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
338
339                                         MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
340                                 }
341                                 
342                                 //close files 
343                                 MPI_File_close(&inMPI);
344                                 MPI_File_close(&outMPI);
345                                 MPI_File_close(&outMPIAccnos);
346                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
347                                 
348                                 //delete accnos file if blank
349                                 if (pid == 0) {
350                                         if (!MPIWroteAccnos) { 
351                                                 hasAccnos = false;      
352                                                 remove(accnosFileName.c_str()); 
353                                         }
354                                 }
355
356                 #else
357                 
358                         //break up file
359                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
360                                 if(processors == 1){
361                                         ifstream inFASTA;
362                                         openInputFile(fastaFileNames[s], inFASTA);
363                                         getNumSeqs(inFASTA, numSeqs);
364                                         inFASTA.close();
365                                         
366                                         lines.push_back(new linePair(0, numSeqs));
367                                         
368                                         driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
369                                         
370                                         if (m->control_pressed) { 
371                                                 remove(outputFileName.c_str()); 
372                                                 remove(accnosFileName.c_str());
373                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
374                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
375                                                 delete chimera;
376                                                 return 0;
377                                         }
378                                         
379                                         //delete accnos file if its blank 
380                                         if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
381                                                                         
382                                 }else{
383                                         vector<int> positions;
384                                         processIDS.resize(0);
385                                         
386                                         ifstream inFASTA;
387                                         openInputFile(fastaFileNames[s], inFASTA);
388                                         
389                                         string input;
390                                         while(!inFASTA.eof()){
391                                                 input = getline(inFASTA);
392                                                 if (input.length() != 0) {
393                                                         if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
394                                                 }
395                                         }
396                                         inFASTA.close();
397                                         
398                                         numSeqs = positions.size();
399                                         
400                                         int numSeqsPerProcessor = numSeqs / processors;
401                                         
402                                         for (int i = 0; i < processors; i++) {
403                                                 long int startPos = positions[ i * numSeqsPerProcessor ];
404                                                 if(i == processors - 1){
405                                                         numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
406                                                 }
407                                                 lines.push_back(new linePair(startPos, numSeqsPerProcessor));
408                                         }
409                                         
410                                         createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
411                                 
412                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
413                                                 
414                                         //append output files
415                                         for(int i=1;i<processors;i++){
416                                                 appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
417                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
418                                         }
419                                         
420                                         vector<string> nonBlankAccnosFiles;
421                                         //delete blank accnos files generated with multiple processes
422                                         for(int i=0;i<processors;i++){  
423                                                 if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
424                                                         nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
425                                                 }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
426                                         }
427                                         
428                                         //append accnos files
429                                         if (nonBlankAccnosFiles.size() != 0) { 
430                                                 rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
431                                                 
432                                                 for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
433                                                         appendFiles(nonBlankAccnosFiles[h], accnosFileName);
434                                                         remove(nonBlankAccnosFiles[h].c_str());
435                                                 }
436                                         }else{ hasAccnos = false;  }
437                                         
438                                         if (m->control_pressed) { 
439                                                 remove(outputFileName.c_str()); 
440                                                 remove(accnosFileName.c_str());
441                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
442                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
443                                                 delete chimera;
444                                                 return 0;
445                                         }
446                                 }
447
448                         #else
449                                 ifstream inFASTA;
450                                 openInputFile(fastaFileNames[s], inFASTA);
451                                 getNumSeqs(inFASTA, numSeqs);
452                                 inFASTA.close();
453                                 lines.push_back(new linePair(0, numSeqs));
454                                 
455                                 driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
456                                 
457                                 if (m->control_pressed) { 
458                                                 remove(outputFileName.c_str()); 
459                                                 remove(accnosFileName.c_str());
460                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
461                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
462                                                 delete chimera;
463                                                 return 0;
464                                 }
465                                 
466                                 //delete accnos file if its blank 
467                                 if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
468                         #endif
469                         
470                 #endif  
471                 
472                         delete chimera;
473                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
474                         
475                         outputNames.push_back(outputFileName);
476                         if (hasAccnos) { outputNames.push_back(accnosFileName); }
477                         
478                         m->mothurOutEndLine();
479                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
480                 }
481                 
482                 m->mothurOutEndLine();
483                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
484                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
485                 m->mothurOutEndLine();
486                         
487                 return 0;
488                 
489         }
490         catch(exception& e) {
491                 m->errorOut(e, "ChimeraPintailCommand", "execute");
492                 exit(1);
493         }
494 }
495 //**********************************************************************************************************************
496
497 int ChimeraPintailCommand::driver(linePair* line, string outputFName, string filename, string accnos){
498         try {
499                 ofstream out;
500                 openOutputFile(outputFName, out);
501                 
502                 ofstream out2;
503                 openOutputFile(accnos, out2);
504                 
505                 ifstream inFASTA;
506                 openInputFile(filename, inFASTA);
507
508                 inFASTA.seekg(line->start);
509                 
510                 for(int i=0;i<line->numSeqs;i++){
511                 
512                         if (m->control_pressed) {       return 1;       }
513                 
514                         Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
515                                 
516                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
517                                 
518                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
519                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
520                                 }else{
521                                         //find chimeras
522                                         chimera->getChimeras(candidateSeq);
523                                         
524                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
525                 
526                                         //print results
527                                         chimera->print(out, out2);
528                                 }
529                         }
530                         delete candidateSeq;
531                         
532                         //report progress
533                         if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
534                 }
535                 //report progress
536                 if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
537                 
538                 out.close();
539                 out2.close();
540                 inFASTA.close();
541                                 
542                 return 0;
543         }
544         catch(exception& e) {
545                 m->errorOut(e, "ChimeraPintailCommand", "driver");
546                 exit(1);
547         }
548 }
549 //**********************************************************************************************************************
550 #ifdef USE_MPI
551 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
552         try {
553                                 
554                 MPI_Status status; 
555                 int pid;
556                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
557                 
558                 for(int i=0;i<num;i++){
559                         
560                         if (m->control_pressed) {       return 1;       }
561                         
562                         //read next sequence
563                         int length = MPIPos[start+i+1] - MPIPos[start+i];
564         
565                         char* buf4 = new char[length];
566                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
567                         
568                         string tempBuf = buf4;
569                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
570                         istringstream iss (tempBuf,istringstream::in);
571                         delete buf4;
572
573                         Sequence* candidateSeq = new Sequence(iss);  gobble(iss);
574                                 
575                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
576                                 
577                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
578                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
579                                 }else{
580                                         //find chimeras
581                                         chimera->getChimeras(candidateSeq);
582                                         
583                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
584                 
585                                         //print results
586                                         bool isChimeric = chimera->print(outMPI, outAccMPI);
587                                         if (isChimeric) { MPIWroteAccnos = true;  }
588                                 }
589                         }
590                         delete candidateSeq;
591                         
592                         //report progress
593                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
594                 }
595                 //report progress
596                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
597                 
598                                 
599                 return 0;
600         }
601         catch(exception& e) {
602                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
603                 exit(1);
604         }
605 }
606 #endif
607
608 /**************************************************************************************************/
609
610 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
611         try {
612 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
613                 int process = 0;
614                 //              processIDS.resize(0);
615                 
616                 //loop through and create all the processes you want
617                 while (process != processors) {
618                         int pid = fork();
619                         
620                         if (pid > 0) {
621                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
622                                 process++;
623                         }else if (pid == 0){
624                                 driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
625                                 exit(0);
626                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
627                 }
628                 
629                 //force parent to wait until all the processes are done
630                 for (int i=0;i<processors;i++) { 
631                         int temp = processIDS[i];
632                         wait(&temp);
633                 }
634                 
635                 return 0;
636 #endif          
637         }
638         catch(exception& e) {
639                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
640                 exit(1);
641         }
642 }
643
644 /**************************************************************************************************/
645
646