]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //***************************************************************************************************************
14
15 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter("chimera.pintail");
31                         map<string,string>::iterator it;
32                         
33                         //check to make sure all parameters are valid for command
34                         for (it = parameters.begin(); it != parameters.end(); it++) { 
35                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
36                         }
37                         
38                         //if the user changes the input directory command factory will send this info to us in the output parameter 
39                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
40                         if (inputDir == "not found"){   inputDir = "";          }
41                         else {
42                                 string path;
43                                 it = parameters.find("template");
44                                 //user has given a template file
45                                 if(it != parameters.end()){ 
46                                         path = m->hasPath(it->second);
47                                         //if the user has not given a path then, add inputdir. else leave path alone.
48                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
49                                 }
50                                 
51                                 it = parameters.find("conservation");
52                                 //user has given a template file
53                                 if(it != parameters.end()){ 
54                                         path = m->hasPath(it->second);
55                                         //if the user has not given a path then, add inputdir. else leave path alone.
56                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
57                                 }
58                                 
59                                 it = parameters.find("quantile");
60                                 //user has given a template file
61                                 if(it != parameters.end()){ 
62                                         path = m->hasPath(it->second);
63                                         //if the user has not given a path then, add inputdir. else leave path alone.
64                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
65                                 }
66                         }
67
68                         
69                         //check for required parameters
70                         fastafile = validParameter.validFile(parameters, "fasta", false);
71                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
72                         else { 
73                                 m->splitAtDash(fastafile, fastaFileNames);
74                                 
75                                 //go through files and make sure they are good, if not, then disregard them
76                                 for (int i = 0; i < fastaFileNames.size(); i++) {
77                                         if (inputDir != "") {
78                                                 string path = m->hasPath(fastaFileNames[i]);
79                                                 //if the user has not given a path then, add inputdir. else leave path alone.
80                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
81                                         }
82         
83                                         int ableToOpen;
84                                         ifstream in;
85                                         
86                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
87                                 
88                                         //if you can't open it, try default location
89                                         if (ableToOpen == 1) {
90                                                 if (m->getDefaultPath() != "") { //default path is set
91                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
92                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
93                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
94                                                         fastaFileNames[i] = tryPath;
95                                                 }
96                                         }
97                                         in.close();
98                                         
99                                         if (ableToOpen == 1) { 
100                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
101                                                 //erase from file list
102                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
103                                                 i--;
104                                         }
105                                 }
106                                 
107                                 //make sure there is at least one valid file left
108                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
109                         }
110                         
111                         string temp;
112                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
113                         filter = m->isTrue(temp);
114                         
115                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
116                         convert(temp, processors);
117                         
118                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
119                         convert(temp, window);
120                         
121                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
122                         convert(temp, increment);
123                         
124                         maskfile = validParameter.validFile(parameters, "mask", false);
125                         if (maskfile == "not found") { maskfile = "";  }        
126                         else if (maskfile != "default")  { 
127                                 if (inputDir != "") {
128                                         string path = m->hasPath(maskfile);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       maskfile = inputDir + maskfile;         }
131                                 }
132
133                                 ifstream in;
134                                 int     ableToOpen = m->openInputFile(maskfile, in);
135                                 if (ableToOpen == 1) { abort = true; }
136                                 in.close();
137                         }
138
139                         
140                         //if the user changes the output directory command factory will send this info to us in the output parameter 
141                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
142                                 outputDir = ""; 
143                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
144                         }
145                 
146                         templatefile = validParameter.validFile(parameters, "template", true);
147                         if (templatefile == "not open") { abort = true; }
148                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
149                         
150                         consfile = validParameter.validFile(parameters, "conservation", true);
151                         if (consfile == "not open") { abort = true; }
152                         else if (consfile == "not found") { 
153                                 consfile = "";  
154                                 //check for consfile
155                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
156                                 ifstream FileTest(tempConsFile.c_str());
157                                 if(FileTest){   
158                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
159                                         if (GoodFile) {  
160                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
161                                         }
162                                 }
163                         }       
164                         
165                         quanfile = validParameter.validFile(parameters, "quantile", true);
166                         if (quanfile == "not open") { abort = true; }
167                         else if (quanfile == "not found") { quanfile = ""; }
168                 }
169         }
170         catch(exception& e) {
171                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
172                 exit(1);
173         }
174 }
175 //**********************************************************************************************************************
176
177 void ChimeraPintailCommand::help(){
178         try {
179         
180                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
181                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
182                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
183                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
184                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
185                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
186                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
187                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
188                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
189                 #ifdef USE_MPI
190                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
191                 #endif
192                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
193                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
194                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
195                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
196                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
197                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
198                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
199                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "ChimeraPintailCommand", "help");
203                 exit(1);
204         }
205 }
206
207 //***************************************************************************************************************
208
209 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
210
211 //***************************************************************************************************************
212
213 int ChimeraPintailCommand::execute(){
214         try{
215                 
216                 if (abort == true) { return 0; }
217                 
218                 for (int s = 0; s < fastaFileNames.size(); s++) {
219                                 
220                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
221
222                         int start = time(NULL); 
223                         
224                         //set user options
225                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
226                         
227                         //check for quantile to save the time
228                         string tempQuan = "";
229                         if ((!filter) && (maskfile == "")) {
230                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
231                         }else if ((!filter) && (maskfile != "")) { 
232                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
233                         }else if ((filter) && (maskfile != "")) { 
234                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
235                         }else if ((filter) && (maskfile == "")) { 
236                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
237                         }
238                         
239                         ifstream FileTest(tempQuan.c_str());
240                         if(FileTest){   
241                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
242                                 if (GoodFile) {  
243                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
244                                 }
245                         }
246                         
247                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
248                         
249                         string outputFileName, accnosFileName;
250                         if (maskfile != "") {
251                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras";
252                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos";
253                         }else {
254                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
255                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
256                         }
257                         
258                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
259                         
260                         if (chimera->getUnaligned()) { 
261                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
262                                 delete chimera;
263                                 return 0; 
264                         }
265                         templateSeqsLength = chimera->getLength();
266                 
267                 #ifdef USE_MPI
268                         int pid, end, numSeqsPerProcessor; 
269                                 int tag = 2001;
270                                 vector<unsigned long int> MPIPos;
271                                 
272                                 MPI_Status status; 
273                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
274                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
275
276                                 MPI_File inMPI;
277                                 MPI_File outMPI;
278                                 MPI_File outMPIAccnos;
279                                 
280                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
281                                 int inMode=MPI_MODE_RDONLY; 
282                                 
283                                 char outFilename[1024];
284                                 strcpy(outFilename, outputFileName.c_str());
285                                 
286                                 char outAccnosFilename[1024];
287                                 strcpy(outAccnosFilename, accnosFileName.c_str());
288                                 
289                                 char inFileName[1024];
290                                 strcpy(inFileName, fastaFileNames[s].c_str());
291
292                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
293                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
294                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
295                                 
296                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
297
298                                 if (pid == 0) { //you are the root process 
299                                                                 
300                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
301                                         
302                                         //send file positions to all processes
303                                         for(int i = 1; i < processors; i++) { 
304                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
305                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
306                                         }
307                                         
308                                         //figure out how many sequences you have to align
309                                         numSeqsPerProcessor = numSeqs / processors;
310                                         int startIndex =  pid * numSeqsPerProcessor;
311                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
312                                 
313                                         //do your part
314                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
315                                         
316                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
317                                         
318                                 }else{ //you are a child process
319                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
320                                         MPIPos.resize(numSeqs+1);
321                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
322                                         
323                                         //figure out how many sequences you have to align
324                                         numSeqsPerProcessor = numSeqs / processors;
325                                         int startIndex =  pid * numSeqsPerProcessor;
326                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
327                                         
328                                         //do your part
329                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
330                                         
331                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
332                                 }
333                                 
334                                 //close files 
335                                 MPI_File_close(&inMPI);
336                                 MPI_File_close(&outMPI);
337                                 MPI_File_close(&outMPIAccnos);
338                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
339                 #else
340                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
341                                 
342                         for (int i = 0; i < (positions.size()-1); i++) {
343                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
344                         }       
345                         
346                         //break up file
347                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
348                                 if(processors == 1){
349                 
350                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
351                                         
352                                         if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
353                                         
354                                 }else{
355                                         processIDS.resize(0);
356                                         
357                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
358                                 
359                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
360                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
361                                                 
362                                         //append output files
363                                         for(int i=1;i<processors;i++){
364                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
365                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
366                                         }
367                                         
368                                         //append output files
369                                         for(int i=1;i<processors;i++){
370                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
371                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
372                                         }
373                                                                                 
374                                         if (m->control_pressed) { 
375                                                 remove(outputFileName.c_str()); 
376                                                 remove(accnosFileName.c_str());
377                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
378                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
379                                                 delete chimera;
380                                                 return 0;
381                                         }
382                                 }
383
384                         #else
385                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
386                                 
387                                 if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
388                         #endif
389                         
390                 #endif  
391                 
392                         delete chimera;
393                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
394                         
395                         outputNames.push_back(outputFileName);
396                         outputNames.push_back(accnosFileName); 
397                         
398                         m->mothurOutEndLine();
399                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
400                 }
401                 
402                 m->mothurOutEndLine();
403                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
404                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
405                 m->mothurOutEndLine();
406                         
407                 return 0;
408                 
409         }
410         catch(exception& e) {
411                 m->errorOut(e, "ChimeraPintailCommand", "execute");
412                 exit(1);
413         }
414 }
415 //**********************************************************************************************************************
416
417 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
418         try {
419                 ofstream out;
420                 m->openOutputFile(outputFName, out);
421                 
422                 ofstream out2;
423                 m->openOutputFile(accnos, out2);
424                 
425                 ifstream inFASTA;
426                 m->openInputFile(filename, inFASTA);
427
428                 inFASTA.seekg(filePos->start);
429
430                 bool done = false;
431                 int count = 0;
432         
433                 while (!done) {
434                                 
435                         if (m->control_pressed) {       return 1;       }
436                 
437                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
438                                 
439                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
440                                 
441                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
442                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
443                                 }else{
444                                         //find chimeras
445                                         chimera->getChimeras(candidateSeq);
446                                         
447                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
448                 
449                                         //print results
450                                         chimera->print(out, out2);
451                                 }
452                                 count++;
453                         }
454                         delete candidateSeq;
455                         
456                         unsigned long int pos = inFASTA.tellg();
457                         if ((pos == -1) || (pos >= filePos->end)) { break; }
458                         
459                         //report progress
460                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
461                 }
462                 //report progress
463                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
464                 
465                 out.close();
466                 out2.close();
467                 inFASTA.close();
468                                 
469                 return count;
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "ChimeraPintailCommand", "driver");
473                 exit(1);
474         }
475 }
476 //**********************************************************************************************************************
477 #ifdef USE_MPI
478 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
479         try {
480                                 
481                 MPI_Status status; 
482                 int pid;
483                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
484                 
485                 for(int i=0;i<num;i++){
486                         
487                         if (m->control_pressed) {       return 1;       }
488                         
489                         //read next sequence
490                         int length = MPIPos[start+i+1] - MPIPos[start+i];
491         
492                         char* buf4 = new char[length];
493                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
494                         
495                         string tempBuf = buf4;
496                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
497                         istringstream iss (tempBuf,istringstream::in);
498                         delete buf4;
499
500                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
501                                 
502                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
503                                 
504                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
505                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
506                                 }else{
507                                         //find chimeras
508                                         chimera->getChimeras(candidateSeq);
509                                         
510                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
511                 
512                                         //print results
513                                         bool isChimeric = chimera->print(outMPI, outAccMPI);
514                                 }
515                         }
516                         delete candidateSeq;
517                         
518                         //report progress
519                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
520                 }
521                 //report progress
522                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
523                 
524                                 
525                 return 0;
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
529                 exit(1);
530         }
531 }
532 #endif
533
534 /**************************************************************************************************/
535
536 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
537         try {
538 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
539                 int process = 0;
540                 int num = 0;
541                 
542                 //loop through and create all the processes you want
543                 while (process != processors) {
544                         int pid = fork();
545                         
546                         if (pid > 0) {
547                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
548                                 process++;
549                         }else if (pid == 0){
550                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
551                                 
552                                 //pass numSeqs to parent
553                                 ofstream out;
554                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
555                                 m->openOutputFile(tempFile, out);
556                                 out << num << endl;
557                                 out.close();
558
559                                 exit(0);
560                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
561                 }
562                 
563                 //force parent to wait until all the processes are done
564                 for (int i=0;i<processors;i++) { 
565                         int temp = processIDS[i];
566                         wait(&temp);
567                 }
568                 
569                 for (int i = 0; i < processIDS.size(); i++) {
570                         ifstream in;
571                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
572                         m->openInputFile(tempFile, in);
573                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
574                         in.close(); remove(tempFile.c_str());
575                 }
576                 
577                 return num;
578 #endif          
579         }
580         catch(exception& e) {
581                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
582                 exit(1);
583         }
584 }
585
586 /**************************************************************************************************/
587
588