]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerapintailcommand.cpp
modified trim.seqs to split by primer name if primer name is given, and warn if dupli...
[mothur.git] / chimerapintailcommand.cpp
index 62f1f2bae942f1c91ca1ee16bb21b6a4996c790e..6cf581b9308f21159186ece928bcbdf8e4142803 100644 (file)
@@ -36,18 +36,10 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        }
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);             
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
                                string path;
-                               it = parameters.find("fasta");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
-                               }
-                               
                                it = parameters.find("template");
                                //user has given a template file
                                if(it != parameters.end()){ 
@@ -75,27 +67,68 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
 
                        
                        //check for required parameters
-                       fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }    
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
-                       }
+                       fastafile = validParameter.validFile(parameters, "fasta", false);
+                       if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                       else { 
+                               splitAtDash(fastafile, fastaFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < fastaFileNames.size(); i++) {
+                                       if (inputDir != "") {
+                                               string path = hasPath(fastaFileNames[i]);
+                                               //if the user has not given a path then, add inputdir. else leave path alone.
+                                               if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
+                                       }
+       
+                                       int ableToOpen;
+                                       ifstream in;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
 
-                       templatefile = validParameter.validFile(parameters, "template", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                                       ableToOpen = openInputFile(fastaFileNames[i], in);
+                                       in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+
+                                       if (ableToOpen == 1) { 
+                                               m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
+                                               //erase from file list
+                                               fastaFileNames.erase(fastaFileNames.begin()+i);
+                                               i--;
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+                       }
                        
-                       consfile = validParameter.validFile(parameters, "conservation", true);
-                       if (consfile == "not open") { abort = true; }
-                       else if (consfile == "not found") { consfile = "";  }   
+                       string temp;
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       filter = isTrue(temp);
                        
-                       quanfile = validParameter.validFile(parameters, "quantile", true);
-                       if (quanfile == "not open") { abort = true; }
-                       else if (quanfile == "not found") { quanfile = "";  }
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
+                       convert(temp, increment);
                        
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
@@ -111,19 +144,31 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                                if (ableToOpen == 1) { abort = true; }
                                in.close();
                        }
-                                               
-                       string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
-                       filter = isTrue(temp);
+
                        
-                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
-                       convert(temp, processors);
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+               
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
                        
-                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
-                       convert(temp, window);
+                       consfile = validParameter.validFile(parameters, "conservation", true);
+                       if (consfile == "not open") { abort = true; }
+                       else if (consfile == "not found") { 
+                               consfile = "";  
+                               //check for consfile
+                               string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
+                               ifstream FileTest(tempConsFile.c_str());
+                               if(FileTest){   m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     }
+                       }       
                        
-                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
-                       convert(temp, increment);
+                       quanfile = validParameter.validFile(parameters, "quantile", true);
+                       if (quanfile == "not open") { abort = true; }
+                       else if (quanfile == "not found") { quanfile = "";  }
                }
        }
        catch(exception& e) {
@@ -140,6 +185,7 @@ void ChimeraPintailCommand::help(){
                m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
                m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
                m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
                m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
                m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
                m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
@@ -152,8 +198,8 @@ void ChimeraPintailCommand::help(){
                m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
                m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
                m->mothurOut("The chimera.pintail command should be in the following format: \n");
-               m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
-               m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
+               m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
+               m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
        }
        catch(exception& e) {
@@ -173,257 +219,236 @@ int ChimeraPintailCommand::execute(){
                
                if (abort == true) { return 0; }
                
-               int start = time(NULL); 
-               
-               //set user options
-               if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
-               
-               chimera = new Pintail(fastafile, templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
-               
-               string outputFileName, accnosFileName;
-               if (maskfile != "") {
-                       outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.chimeras";
-                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.accnos";
-               }else {
-                       outputFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.chimeras";
-                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.accnos";
-               }
-               bool hasAccnos = true;
-               
-               if (m->control_pressed) { delete chimera;       return 0;       }
-               
-               if (chimera->getUnaligned()) { 
-                       m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
-                       delete chimera;
-                       return 0; 
-               }
-               templateSeqsLength = chimera->getLength();
-       
-       #ifdef USE_MPI
-               int pid, end, numSeqsPerProcessor; 
-                       int tag = 2001;
-                       vector<long> MPIPos;
-                       MPIWroteAccnos = false;
-                       
-                       MPI_Status status; 
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
-
-                       MPI_File inMPI;
-                       MPI_File outMPI;
-                       MPI_File outMPIAccnos;
-                       
-                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
-                       int inMode=MPI_MODE_RDONLY; 
-                       
-                       //char* outFilename = new char[outputFileName.length()];
-                       //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
-                       
-                       char outFilename[1024];
-                       strcpy(outFilename, outputFileName.c_str());
-                       
-                       //char* outAccnosFilename = new char[accnosFileName.length()];
-                       //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
-                       
-                       char outAccnosFilename[1024];
-                       strcpy(outAccnosFilename, accnosFileName.c_str());
+               for (int s = 0; s < fastaFileNames.size(); s++) {
+                               
+                       m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
 
-                       //char* inFileName = new char[fastafile.length()];
-                       //memcpy(inFileName, fastafile.c_str(), fastafile.length());
+                       int start = time(NULL); 
+                       
+                       //set user options
+                       if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
+                       
+                       //check for quantile to save the time
+                       string tempQuan = "";
+                       if ((!filter) && (maskfile == "")) {
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.quan";
+                       }else if ((!filter) && (maskfile != "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.masked.quan";
+                       }else if ((filter) && (maskfile != "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "masked.quan";
+                       }else if ((filter) && (maskfile == "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "quan";
+                       }
                        
-                       char inFileName[1024];
-                       strcpy(inFileName, fastafile.c_str());
-
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
-                       MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+                       ifstream FileTest(tempQuan.c_str());
+                       if(FileTest){   m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     }
                        
-                       //delete inFileName;
-                       //delete outFilename;
-                       //delete outAccnosFilename;
-
-                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
-
-                       if (pid == 0) { //you are the root process 
-                                                       
-                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
-                               
-                               //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                               int startIndex =  pid * numSeqsPerProcessor;
+                       chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
                        
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-                               
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
-                               
-                               for (int i = 1; i < processors; i++) {
-                                       bool tempResult;
-                                       MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-                                       if (tempResult != 0) { MPIWroteAccnos = true; }
-                               }
-                       }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-                               
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
-
-                               MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                       string outputFileName, accnosFileName;
+                       if (maskfile != "") {
+                               outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras";
+                               accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos";
+                       }else {
+                               outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
+                               accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
                        }
                        
-                       //close files 
-                       MPI_File_close(&inMPI);
-                       MPI_File_close(&outMPI);
-                       MPI_File_close(&outMPIAccnos);
+                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
                        
-                       //delete accnos file if blank
-                       if (pid == 0) {
-                               if (!MPIWroteAccnos) { 
-                                       //MPI_Info info;
-                                       //MPI_File_delete(outAccnosFilename, info);
-                                       hasAccnos = false;      
-                                       remove(accnosFileName.c_str()); 
-                               }
+                       if (chimera->getUnaligned()) { 
+                               m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
+                               delete chimera;
+                               return 0; 
                        }
-
-       #else
-       
-               //break up file
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numSeqs));
+                       templateSeqsLength = chimera->getLength();
+               
+               #ifdef USE_MPI
+                       int pid, end, numSeqsPerProcessor; 
+                               int tag = 2001;
+                               vector<long> MPIPos;
                                
-                               driver(lines[0], outputFileName, fastafile, accnosFileName);
+                               MPI_Status status; 
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                               MPI_File inMPI;
+                               MPI_File outMPI;
+                               MPI_File outMPIAccnos;
                                
-                               if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                               }
+                               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                               int inMode=MPI_MODE_RDONLY; 
                                
-                               //delete accnos file if its blank 
-                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-                                                               
-                       }else{
-                               vector<int> positions;
-                               processIDS.resize(0);
+                               char outFilename[1024];
+                               strcpy(outFilename, outputFileName.c_str());
                                
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
-                                       }
-                               }
-                               inFASTA.close();
+                               char outAccnosFilename[1024];
+                               strcpy(outAccnosFilename, accnosFileName.c_str());
                                
-                               numSeqs = positions.size();
-                               
-                               int numSeqsPerProcessor = numSeqs / processors;
+                               char inFileName[1024];
+                               strcpy(inFileName, fastaFileNames[s].c_str());
+
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                               MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                                
-                               for (int i = 0; i < processors; i++) {
-                                       long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+
+                               if (pid == 0) { //you are the root process 
+                                                               
+                                       MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+                                       
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
                                        }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                               }
-                               
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numSeqs / processors;
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                
-                               createProcesses(outputFileName, fastafile, accnosFileName); 
-                       
-                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       //do your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
                                        
-                               //append output files
-                               for(int i=1;i<processors;i++){
-                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }else{ //you are a child process
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPIPos.resize(numSeqs+1);
+                                       MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numSeqs / processors;
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                                       
+                                       //do your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
                                }
                                
-                               vector<string> nonBlankAccnosFiles;
-                               //delete blank accnos files generated with multiple processes
-                               for(int i=0;i<processors;i++){  
-                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
-                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
-                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
-                               }
+                               //close files 
+                               MPI_File_close(&inMPI);
+                               MPI_File_close(&outMPI);
+                               MPI_File_close(&outMPIAccnos);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               #else
+               
+                       //break up file
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       ifstream inFASTA;
+                                       openInputFile(fastaFileNames[s], inFASTA);
+                                       getNumSeqs(inFASTA, numSeqs);
+                                       inFASTA.close();
+                                       
+                                       lines.push_back(new linePair(0, numSeqs));
+                                       
+                                       driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+                                       
+                                       if (m->control_pressed) { 
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
+                                       }
+                                       
+                               }else{
+                                       vector<int> positions;
+                                       processIDS.resize(0);
+                                       
+                                       ifstream inFASTA;
+                                       openInputFile(fastaFileNames[s], inFASTA);
+                                       
+                                       string input;
+                                       while(!inFASTA.eof()){
+                                               input = getline(inFASTA);
+                                               if (input.length() != 0) {
+                                                       if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                               }
+                                       }
+                                       inFASTA.close();
+                                       
+                                       numSeqs = positions.size();
+                                       
+                                       int numSeqsPerProcessor = numSeqs / processors;
+                                       
+                                       for (int i = 0; i < processors; i++) {
+                                               long int startPos = positions[ i * numSeqsPerProcessor ];
+                                               if(i == processors - 1){
+                                                       numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                               }
+                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                                       }
+                                       
+                                       createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
                                
-                               //append accnos files
-                               if (nonBlankAccnosFiles.size() != 0) { 
-                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
+                                               
+                                       //append output files
+                                       for(int i=1;i<processors;i++){
+                                               appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                               remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       }
                                        
-                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                                               appendFiles(nonBlankAccnosFiles[h], accnosFileName);
-                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       //append output files
+                                       for(int i=1;i<processors;i++){
+                                               appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
+                                               remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       }
+                                                                               
+                                       if (m->control_pressed) { 
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
                                        }
-                               }else{ hasAccnos = false;  }
+                               }
+
+                       #else
+                               ifstream inFASTA;
+                               openInputFile(fastaFileNames[s], inFASTA);
+                               getNumSeqs(inFASTA, numSeqs);
+                               inFASTA.close();
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
                                if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
                                }
-                       }
-
-               #else
-                       ifstream inFASTA;
-                       openInputFile(fastafile, inFASTA);
-                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                       inFASTA.close();
-                       lines.push_back(new linePair(0, numSeqs));
+                       #endif
                        
-                       driver(lines[0], outputFileName, fastafile, accnosFileName);
+               #endif  
+               
+                       delete chimera;
+                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                        
-                       if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                       }
+                       outputNames.push_back(outputFileName);
+                       outputNames.push_back(accnosFileName); 
                        
-                       //delete accnos file if its blank 
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-               #endif
-               
-       #endif  
-       
-               delete chimera;
-               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       m->mothurOutEndLine();
+                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine();    
-               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
                m->mothurOutEndLine();
-               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-               
+                       
                return 0;
                
        }
@@ -524,7 +549,6 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi
                
                                        //print results
                                        bool isChimeric = chimera->print(outMPI, outAccMPI);
-                                       if (isChimeric) { MPIWroteAccnos = true;  }
                                }
                        }
                        delete candidateSeq;