]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerapintailcommand.cpp
added oldfasta and column parameter to dist.seqs so you can append distances to an...
[mothur.git] / chimerapintailcommand.cpp
index 6cef8bbec330c3d345d332f772970ff33a988f79..5818ab22c0bfc3e0245372de038e4c43d95a2289 100644 (file)
@@ -27,7 +27,7 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
                        
-                       ValidParameters validParameter;
+                       ValidParameters validParameter("chimera.pintail");
                        map<string,string>::iterator it;
                        
                        //check to make sure all parameters are valid for command
@@ -36,18 +36,10 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        }
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);             
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
                                string path;
-                               it = parameters.find("fasta");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
-                               }
-                               
                                it = parameters.find("template");
                                //user has given a template file
                                if(it != parameters.end()){ 
@@ -75,27 +67,78 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
 
                        
                        //check for required parameters
-                       fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }    
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
-                       }
+                       fastafile = validParameter.validFile(parameters, "fasta", false);
+                       if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                       else { 
+                               splitAtDash(fastafile, fastaFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < fastaFileNames.size(); i++) {
+                                       if (inputDir != "") {
+                                               string path = hasPath(fastaFileNames[i]);
+                                               //if the user has not given a path then, add inputdir. else leave path alone.
+                                               if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
+                                       }
+       
+                                       int ableToOpen;
+                                       ifstream in;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
 
-                       templatefile = validParameter.validFile(parameters, "template", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                                       ableToOpen = openInputFile(fastaFileNames[i], in, "noerror");
+                               
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getDefaultPath() != "") { //default path is set
+                                                       string tryPath = m->getDefaultPath() + getSimpleName(fastaFileNames[i]);
+                                                       m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       fastaFileNames[i] = tryPath;
+                                               }
+                                       }
+                                       in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+
+                                       if (ableToOpen == 1) { 
+                                               m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                               //erase from file list
+                                               fastaFileNames.erase(fastaFileNames.begin()+i);
+                                               i--;
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+                       }
                        
-                       consfile = validParameter.validFile(parameters, "conservation", true);
-                       if (consfile == "not open") { abort = true; }
-                       else if (consfile == "not found") { consfile = "";  }   
+                       string temp;
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       filter = isTrue(temp);
                        
-                       quanfile = validParameter.validFile(parameters, "quantile", true);
-                       if (quanfile == "not open") { abort = true; }
-                       else if (quanfile == "not found") { quanfile = "";  }
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
+                       convert(temp, increment);
                        
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
@@ -111,19 +154,36 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                                if (ableToOpen == 1) { abort = true; }
                                in.close();
                        }
-                                               
-                       string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
-                       filter = isTrue(temp);
+
                        
-                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
-                       convert(temp, processors);
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+               
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
                        
-                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
-                       convert(temp, window);
+                       consfile = validParameter.validFile(parameters, "conservation", true);
+                       if (consfile == "not open") { abort = true; }
+                       else if (consfile == "not found") { 
+                               consfile = "";  
+                               //check for consfile
+                               string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
+                               ifstream FileTest(tempConsFile.c_str());
+                               if(FileTest){   
+                                       bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+                                       if (GoodFile) {  
+                                               m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
+                                       }
+                               }
+                       }       
                        
-                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
-                       convert(temp, increment);
+                       quanfile = validParameter.validFile(parameters, "quantile", true);
+                       if (quanfile == "not open") { abort = true; }
+                       else if (quanfile == "not found") { quanfile = ""; }
                }
        }
        catch(exception& e) {
@@ -140,6 +200,7 @@ void ChimeraPintailCommand::help(){
                m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
                m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
                m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
                m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
                m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
                m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
@@ -152,8 +213,8 @@ void ChimeraPintailCommand::help(){
                m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
                m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
                m->mothurOut("The chimera.pintail command should be in the following format: \n");
-               m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
-               m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
+               m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
+               m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
        }
        catch(exception& e) {
@@ -173,262 +234,241 @@ int ChimeraPintailCommand::execute(){
                
                if (abort == true) { return 0; }
                
-               int start = time(NULL); 
-               
-               //set user options
-               if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
-               
-               chimera = new Pintail(fastafile, templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
-               
-               string outputFileName, accnosFileName;
-               if (maskfile != "") {
-                       outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.chimeras";
-                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.accnos";
-               }else {
-                       outputFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.chimeras";
-                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.accnos";
-               }
-               bool hasAccnos = true;
-               
-               if (m->control_pressed) { delete chimera;       return 0;       }
-               
-               if (chimera->getUnaligned()) { 
-                       m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
-                       delete chimera;
-                       return 0; 
-               }
-               templateSeqsLength = chimera->getLength();
-       
-       #ifdef USE_MPI
-               int pid, end, numSeqsPerProcessor; 
-                       int tag = 2001;
-                       vector<long> MPIPos;
-                       MPIWroteAccnos = false;
-                       
-                       MPI_Status status; 
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
-
-                       MPI_File inMPI;
-                       MPI_File outMPI;
-                       MPI_File outMPIAccnos;
-                       
-                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
-                       int inMode=MPI_MODE_RDONLY; 
-                       
-                       //char* outFilename = new char[outputFileName.length()];
-                       //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
-                       
-                       char outFilename[1024];
-                       strcpy(outFilename, outputFileName.c_str());
-                       
-                       //char* outAccnosFilename = new char[accnosFileName.length()];
-                       //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
-                       
-                       char outAccnosFilename[1024];
-                       strcpy(outAccnosFilename, accnosFileName.c_str());
-
-                       //char* inFileName = new char[fastafile.length()];
-                       //memcpy(inFileName, fastafile.c_str(), fastafile.length());
-                       
-                       char inFileName[1024];
-                       strcpy(inFileName, fastafile.c_str());
+               for (int s = 0; s < fastaFileNames.size(); s++) {
+                               
+                       m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
 
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
-                       MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+                       int start = time(NULL); 
+                       
+                       //set user options
+                       if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
+                       
+                       //check for quantile to save the time
+                       string tempQuan = "";
+                       if ((!filter) && (maskfile == "")) {
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.quan";
+                       }else if ((!filter) && (maskfile != "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.masked.quan";
+                       }else if ((filter) && (maskfile != "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "masked.quan";
+                       }else if ((filter) && (maskfile == "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "quan";
+                       }
                        
-                       //delete inFileName;
-                       //delete outFilename;
-                       //delete outAccnosFilename;
-
-                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
-
-                       if (pid == 0) { //you are the root process 
-                                                       
-                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
-                               
-                               //send file positions to all processes
-                               for(int i = 1; i < processors; i++) { 
-                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
-                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                       ifstream FileTest(tempQuan.c_str());
+                       if(FileTest){   
+                               bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+                               if (GoodFile) {  
+                                       m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
                                }
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numSeqs / processors;
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                               
+                       }
                        
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-                               
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
-                               
-                               for (int i = 1; i < processors; i++) {
-                                       bool tempResult;
-                                       MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-                                       if (tempResult != 0) { MPIWroteAccnos = true; }
-                               }
-                       }else{ //you are a child process
-                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                               MPIPos.resize(numSeqs+1);
-                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numSeqs / processors;
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                               
-                               
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-                               
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
-
-                               MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                       chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
+                       
+                       string outputFileName, accnosFileName;
+                       if (maskfile != "") {
+                               outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras";
+                               accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos";
+                       }else {
+                               outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
+                               accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
                        }
                        
-                       //close files 
-                       MPI_File_close(&inMPI);
-                       MPI_File_close(&outMPI);
-                       MPI_File_close(&outMPIAccnos);
-                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
                        
-                       //delete accnos file if blank
-                       if (pid == 0) {
-                               if (!MPIWroteAccnos) { 
-                                       //MPI_Info info;
-                                       //MPI_File_delete(outAccnosFilename, info);
-                                       hasAccnos = false;      
-                                       remove(accnosFileName.c_str()); 
-                               }
+                       if (chimera->getUnaligned()) { 
+                               m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
+                               delete chimera;
+                               return 0; 
                        }
-
-       #else
-       
-               //break up file
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                               inFASTA.close();
+                       templateSeqsLength = chimera->getLength();
+               
+               #ifdef USE_MPI
+                       int pid, end, numSeqsPerProcessor; 
+                               int tag = 2001;
+                               vector<unsigned long int> MPIPos;
                                
-                               lines.push_back(new linePair(0, numSeqs));
+                               MPI_Status status; 
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                               MPI_File inMPI;
+                               MPI_File outMPI;
+                               MPI_File outMPIAccnos;
                                
-                               driver(lines[0], outputFileName, fastafile, accnosFileName);
+                               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                               int inMode=MPI_MODE_RDONLY; 
                                
-                               if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                               }
+                               char outFilename[1024];
+                               strcpy(outFilename, outputFileName.c_str());
                                
-                               //delete accnos file if its blank 
-                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-                                                               
-                       }else{
-                               vector<int> positions;
-                               processIDS.resize(0);
-                               
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
+                               char outAccnosFilename[1024];
+                               strcpy(outAccnosFilename, accnosFileName.c_str());
                                
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
-                                       }
-                               }
-                               inFASTA.close();
-                               
-                               numSeqs = positions.size();
-                               
-                               int numSeqsPerProcessor = numSeqs / processors;
+                               char inFileName[1024];
+                               strcpy(inFileName, fastaFileNames[s].c_str());
+
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                               MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                                
-                               for (int i = 0; i < processors; i++) {
-                                       long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+
+                               if (pid == 0) { //you are the root process 
+                                                               
+                                       MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+                                       
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
                                        }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                               }
-                               
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numSeqs / processors;
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                
-                               createProcesses(outputFileName, fastafile, accnosFileName); 
-                       
-                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       //do your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+                                       
+                               }else{ //you are a child process
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPIPos.resize(numSeqs+1);
+                                       MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                        
-                               //append output files
-                               for(int i=1;i<processors;i++){
-                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numSeqs / processors;
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                                       
+                                       //do your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
                                }
                                
-                               vector<string> nonBlankAccnosFiles;
-                               //delete blank accnos files generated with multiple processes
-                               for(int i=0;i<processors;i++){  
-                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
-                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
-                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
-                               }
+                               //close files 
+                               MPI_File_close(&inMPI);
+                               MPI_File_close(&outMPI);
+                               MPI_File_close(&outMPIAccnos);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               #else
+               
+                       //break up file
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       ifstream inFASTA;
+                                       openInputFile(fastaFileNames[s], inFASTA);
+                                       getNumSeqs(inFASTA, numSeqs);
+                                       inFASTA.close();
+                                       
+                                       lines.push_back(new linePair(0, numSeqs));
+                                       
+                                       driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+                                       
+                                       if (m->control_pressed) { 
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
+                                       }
+                                       
+                               }else{
+                                       vector<unsigned long int> positions;
+                                       processIDS.resize(0);
+                                       
+                                       ifstream inFASTA;
+                                       openInputFile(fastaFileNames[s], inFASTA);
+                                       
+                                       string input;
+                                       while(!inFASTA.eof()){
+                                               input = getline(inFASTA);
+                                               if (input.length() != 0) {
+                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+                                               }
+                                       }
+                                       inFASTA.close();
+                                       
+                                       numSeqs = positions.size();
+                                       
+                                       int numSeqsPerProcessor = numSeqs / processors;
+                                       
+                                       for (int i = 0; i < processors; i++) {
+                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
+                                               if(i == processors - 1){
+                                                       numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                               }
+                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                                       }
+                                       
+                                       createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
                                
-                               //append accnos files
-                               if (nonBlankAccnosFiles.size() != 0) { 
-                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
+                                               
+                                       //append output files
+                                       for(int i=1;i<processors;i++){
+                                               appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                               remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       }
                                        
-                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                                               appendFiles(nonBlankAccnosFiles[h], accnosFileName);
-                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       //append output files
+                                       for(int i=1;i<processors;i++){
+                                               appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
+                                               remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       }
+                                                                               
+                                       if (m->control_pressed) { 
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
                                        }
-                               }else{ hasAccnos = false;  }
+                               }
+
+                       #else
+                               ifstream inFASTA;
+                               openInputFile(fastaFileNames[s], inFASTA);
+                               getNumSeqs(inFASTA, numSeqs);
+                               inFASTA.close();
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
                                if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
                                }
-                       }
-
-               #else
-                       ifstream inFASTA;
-                       openInputFile(fastafile, inFASTA);
-                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                       inFASTA.close();
-                       lines.push_back(new linePair(0, numSeqs));
+                       #endif
                        
-                       driver(lines[0], outputFileName, fastafile, accnosFileName);
+               #endif  
+               
+                       delete chimera;
+                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                        
-                       if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                       }
+                       outputNames.push_back(outputFileName);
+                       outputNames.push_back(accnosFileName); 
                        
-                       //delete accnos file if its blank 
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-               #endif
-               
-       #endif  
-       
-               delete chimera;
-               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       m->mothurOutEndLine();
+                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine();    
-               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
                m->mothurOutEndLine();
-               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-               
+                       
                return 0;
                
        }
@@ -493,7 +533,7 @@ int ChimeraPintailCommand::driver(linePair* line, string outputFName, string fil
 }
 //**********************************************************************************************************************
 #ifdef USE_MPI
-int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
+int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
        try {
                                
                MPI_Status status; 
@@ -529,7 +569,6 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi
                
                                        //print results
                                        bool isChimeric = chimera->print(outMPI, outAccMPI);
-                                       if (isChimeric) { MPIWroteAccnos = true;  }
                                }
                        }
                        delete candidateSeq;