]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerapintailcommand.cpp
added oldfasta and column parameter to dist.seqs so you can append distances to an...
[mothur.git] / chimerapintailcommand.cpp
index adf060f485d862dc93797069a54bb58fbe25adfb..5818ab22c0bfc3e0245372de038e4c43d95a2289 100644 (file)
@@ -27,7 +27,7 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
                        
-                       ValidParameters validParameter;
+                       ValidParameters validParameter("chimera.pintail");
                        map<string,string>::iterator it;
                        
                        //check to make sure all parameters are valid for command
@@ -36,18 +36,10 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        }
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);             
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
                                string path;
-                               it = parameters.find("fasta");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
-                               }
-                               
                                it = parameters.find("template");
                                //user has given a template file
                                if(it != parameters.end()){ 
@@ -75,27 +67,78 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
 
                        
                        //check for required parameters
-                       fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
-                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }    
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
-                       }
+                       fastafile = validParameter.validFile(parameters, "fasta", false);
+                       if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                       else { 
+                               splitAtDash(fastafile, fastaFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < fastaFileNames.size(); i++) {
+                                       if (inputDir != "") {
+                                               string path = hasPath(fastaFileNames[i]);
+                                               //if the user has not given a path then, add inputdir. else leave path alone.
+                                               if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
+                                       }
+       
+                                       int ableToOpen;
+                                       ifstream in;
+                                       
+                                       #ifdef USE_MPI  
+                                               int pid;
+                                               MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               
+                                               if (pid == 0) {
+                                       #endif
 
-                       templatefile = validParameter.validFile(parameters, "template", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
+                                       ableToOpen = openInputFile(fastaFileNames[i], in, "noerror");
+                               
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getDefaultPath() != "") { //default path is set
+                                                       string tryPath = m->getDefaultPath() + getSimpleName(fastaFileNames[i]);
+                                                       m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       fastaFileNames[i] = tryPath;
+                                               }
+                                       }
+                                       in.close();
+                                       
+                                       #ifdef USE_MPI  
+                                                       for (int j = 1; j < processors; j++) {
+                                                               MPI_Send(&ableToOpen, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); 
+                                                       }
+                                               }else{
+                                                       MPI_Status status;
+                                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                               }
+                                               
+                                       #endif
+
+                                       if (ableToOpen == 1) { 
+                                               m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                               //erase from file list
+                                               fastaFileNames.erase(fastaFileNames.begin()+i);
+                                               i--;
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+                       }
                        
-                       consfile = validParameter.validFile(parameters, "conservation", true);
-                       if (consfile == "not open") { abort = true; }
-                       else if (consfile == "not found") { consfile = "";  }   
+                       string temp;
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       filter = isTrue(temp);
                        
-                       quanfile = validParameter.validFile(parameters, "quantile", true);
-                       if (quanfile == "not open") { abort = true; }
-                       else if (quanfile == "not found") { quanfile = "";  }
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                       
+                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
+                       convert(temp, increment);
                        
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
@@ -111,19 +154,36 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                                if (ableToOpen == 1) { abort = true; }
                                in.close();
                        }
-                                               
-                       string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
-                       filter = isTrue(temp);
+
                        
-                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
-                       convert(temp, processors);
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+               
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
                        
-                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
-                       convert(temp, window);
+                       consfile = validParameter.validFile(parameters, "conservation", true);
+                       if (consfile == "not open") { abort = true; }
+                       else if (consfile == "not found") { 
+                               consfile = "";  
+                               //check for consfile
+                               string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
+                               ifstream FileTest(tempConsFile.c_str());
+                               if(FileTest){   
+                                       bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+                                       if (GoodFile) {  
+                                               m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
+                                       }
+                               }
+                       }       
                        
-                       temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
-                       convert(temp, increment);
+                       quanfile = validParameter.validFile(parameters, "quantile", true);
+                       if (quanfile == "not open") { abort = true; }
+                       else if (quanfile == "not found") { quanfile = ""; }
                }
        }
        catch(exception& e) {
@@ -140,6 +200,7 @@ void ChimeraPintailCommand::help(){
                m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
                m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
                m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
+               m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
                m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
                m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
                m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
@@ -152,8 +213,8 @@ void ChimeraPintailCommand::help(){
                m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
                m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
                m->mothurOut("The chimera.pintail command should be in the following format: \n");
-               m->mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
-               m->mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
+               m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
+               m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
        }
        catch(exception& e) {
@@ -173,259 +234,241 @@ int ChimeraPintailCommand::execute(){
                
                if (abort == true) { return 0; }
                
-               int start = time(NULL); 
-               
-               //set user options
-               if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
-               
-               chimera = new Pintail(fastafile, templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
-               
-               string outputFileName, accnosFileName;
-               if (maskfile != "") {
-                       outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.chimeras";
-                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + maskfile + ".pintail.accnos";
-               }else {
-                       outputFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.chimeras";
-                       accnosFileName = outputDir + getRootName(getSimpleName(fastafile))  + "pintail.accnos";
-               }
-               bool hasAccnos = true;
-               
-               if (m->control_pressed) { delete chimera;       return 0;       }
-               
-               if (chimera->getUnaligned()) { 
-                       m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
-                       delete chimera;
-                       return 0; 
-               }
-               templateSeqsLength = chimera->getLength();
-       
-       #ifdef USE_MPI
-               int pid, end, numSeqsPerProcessor; 
-                       int tag = 2001;
-                       vector<long> MPIPos;
-                       MPIWroteAccnos = false;
-                       
-                       MPI_Status status; 
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
-
-                       MPI_File inMPI;
-                       MPI_File outMPI;
-                       MPI_File outMPIAccnos;
-                       
-                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
-                       int inMode=MPI_MODE_RDONLY; 
-                       
-                       //char* outFilename = new char[outputFileName.length()];
-                       //memcpy(outFilename, outputFileName.c_str(), outputFileName.length());
-                       
-                       char outFilename[1024];
-                       strcpy(outFilename, outputFileName.c_str());
-                       
-                       //char* outAccnosFilename = new char[accnosFileName.length()];
-                       //memcpy(outAccnosFilename, accnosFileName.c_str(), accnosFileName.length());
-                       
-                       char outAccnosFilename[1024];
-                       strcpy(outAccnosFilename, accnosFileName.c_str());
+               for (int s = 0; s < fastaFileNames.size(); s++) {
+                               
+                       m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
 
-                       //char* inFileName = new char[fastafile.length()];
-                       //memcpy(inFileName, fastafile.c_str(), fastafile.length());
+                       int start = time(NULL); 
+                       
+                       //set user options
+                       if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
+                       
+                       //check for quantile to save the time
+                       string tempQuan = "";
+                       if ((!filter) && (maskfile == "")) {
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.quan";
+                       }else if ((!filter) && (maskfile != "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.masked.quan";
+                       }else if ((filter) && (maskfile != "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "masked.quan";
+                       }else if ((filter) && (maskfile == "")) { 
+                               tempQuan = inputDir + getRootName(getSimpleName(templatefile)) + "pintail.filtered." + getSimpleName(getRootName(fastaFileNames[s])) + "quan";
+                       }
                        
-                       char inFileName[1024];
-                       strcpy(inFileName, fastafile.c_str());
-
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
-                       MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
+                       ifstream FileTest(tempQuan.c_str());
+                       if(FileTest){   
+                               bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+                               if (GoodFile) {  
+                                       m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
+                               }
+                       }
                        
-                       //delete inFileName;
-                       //delete outFilename;
-                       //delete outAccnosFilename;
-
-                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
-
-                       if (pid == 0) { //you are the root process 
-                                                       
-                               MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
-                               
-                               //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numSeqs / processors;
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                               
+                       chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
                        
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-                               
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
-                               
-                               for (int i = 1; i < processors; i++) {
-                                       bool tempResult;
-                                       MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-                                       if (tempResult != 0) { MPIWroteAccnos = true; }
-                               }
-                       }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-                               
-                               //figure out how many sequences you have to align
-                               numSeqsPerProcessor = numSeqs / processors;
-                               int startIndex =  pid * numSeqsPerProcessor;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                               
-                               
-                               //align your part
-                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
-                               
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  delete chimera; return 0;  }
-
-                               MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
+                       string outputFileName, accnosFileName;
+                       if (maskfile != "") {
+                               outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.chimeras";
+                               accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + maskfile + ".pintail.accnos";
+                       }else {
+                               outputFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
+                               accnosFileName = outputDir + getRootName(getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
                        }
                        
-                       //close files 
-                       MPI_File_close(&inMPI);
-                       MPI_File_close(&outMPI);
-                       MPI_File_close(&outMPIAccnos);
+                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
                        
-                       //delete accnos file if blank
-                       if (pid == 0) {
-                               if (!MPIWroteAccnos) { 
-                                       //MPI_Info info;
-                                       //MPI_File_delete(outAccnosFilename, info);
-                                       hasAccnos = false;      
-                                       remove(accnosFileName.c_str()); 
-                               }
+                       if (chimera->getUnaligned()) { 
+                               m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
+                               delete chimera;
+                               return 0; 
                        }
-
-       #else
-       
-               //break up file
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numSeqs));
+                       templateSeqsLength = chimera->getLength();
+               
+               #ifdef USE_MPI
+                       int pid, end, numSeqsPerProcessor; 
+                               int tag = 2001;
+                               vector<unsigned long int> MPIPos;
                                
-                               driver(lines[0], outputFileName, fastafile, accnosFileName);
+                               MPI_Status status; 
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                               MPI_File inMPI;
+                               MPI_File outMPI;
+                               MPI_File outMPIAccnos;
                                
-                               if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                               }
+                               int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                               int inMode=MPI_MODE_RDONLY; 
                                
-                               //delete accnos file if its blank 
-                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-                                                               
-                       }else{
-                               vector<int> positions;
-                               processIDS.resize(0);
+                               char outFilename[1024];
+                               strcpy(outFilename, outputFileName.c_str());
                                
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
+                               char outAccnosFilename[1024];
+                               strcpy(outAccnosFilename, accnosFileName.c_str());
                                
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
-                                       }
-                               }
-                               inFASTA.close();
-                               
-                               numSeqs = positions.size();
-                               
-                               int numSeqsPerProcessor = numSeqs / processors;
+                               char inFileName[1024];
+                               strcpy(inFileName, fastaFileNames[s].c_str());
+
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
+                               MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                                
-                               for (int i = 0; i < processors; i++) {
-                                       long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+
+                               if (pid == 0) { //you are the root process 
+                                                               
+                                       MPIPos = setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
+                                       
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
                                        }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                               }
-                               
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numSeqs / processors;
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                
-                               createProcesses(outputFileName, fastafile, accnosFileName); 
-                       
-                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       //do your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
                                        
-                               //append output files
-                               for(int i=1;i<processors;i++){
-                                       appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+                                       
+                               }else{ //you are a child process
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       MPIPos.resize(numSeqs+1);
+                                       MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                                       
+                                       //figure out how many sequences you have to align
+                                       numSeqsPerProcessor = numSeqs / processors;
+                                       int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                                       
+                                       //do your part
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
+                                       
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
                                }
                                
-                               vector<string> nonBlankAccnosFiles;
-                               //delete blank accnos files generated with multiple processes
-                               for(int i=0;i<processors;i++){  
-                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
-                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
-                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
-                               }
+                               //close files 
+                               MPI_File_close(&inMPI);
+                               MPI_File_close(&outMPI);
+                               MPI_File_close(&outMPIAccnos);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               #else
+               
+                       //break up file
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       ifstream inFASTA;
+                                       openInputFile(fastaFileNames[s], inFASTA);
+                                       getNumSeqs(inFASTA, numSeqs);
+                                       inFASTA.close();
+                                       
+                                       lines.push_back(new linePair(0, numSeqs));
+                                       
+                                       driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+                                       
+                                       if (m->control_pressed) { 
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
+                                       }
+                                       
+                               }else{
+                                       vector<unsigned long int> positions;
+                                       processIDS.resize(0);
+                                       
+                                       ifstream inFASTA;
+                                       openInputFile(fastaFileNames[s], inFASTA);
+                                       
+                                       string input;
+                                       while(!inFASTA.eof()){
+                                               input = getline(inFASTA);
+                                               if (input.length() != 0) {
+                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+                                               }
+                                       }
+                                       inFASTA.close();
+                                       
+                                       numSeqs = positions.size();
+                                       
+                                       int numSeqsPerProcessor = numSeqs / processors;
+                                       
+                                       for (int i = 0; i < processors; i++) {
+                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
+                                               if(i == processors - 1){
+                                                       numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                               }
+                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                                       }
+                                       
+                                       createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
                                
-                               //append accnos files
-                               if (nonBlankAccnosFiles.size() != 0) { 
-                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                       rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
+                                               
+                                       //append output files
+                                       for(int i=1;i<processors;i++){
+                                               appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                               remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       }
                                        
-                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                                               appendFiles(nonBlankAccnosFiles[h], accnosFileName);
-                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       //append output files
+                                       for(int i=1;i<processors;i++){
+                                               appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
+                                               remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       }
+                                                                               
+                                       if (m->control_pressed) { 
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
                                        }
-                               }else{ hasAccnos = false;  }
+                               }
+
+                       #else
+                               ifstream inFASTA;
+                               openInputFile(fastaFileNames[s], inFASTA);
+                               getNumSeqs(inFASTA, numSeqs);
+                               inFASTA.close();
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
                                if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
+                                               remove(outputFileName.c_str()); 
+                                               remove(accnosFileName.c_str());
+                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
+                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                                               delete chimera;
+                                               return 0;
                                }
-                       }
-
-               #else
-                       ifstream inFASTA;
-                       openInputFile(fastafile, inFASTA);
-                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                       inFASTA.close();
-                       lines.push_back(new linePair(0, numSeqs));
+                       #endif
                        
-                       driver(lines[0], outputFileName, fastafile, accnosFileName);
+               #endif  
+               
+                       delete chimera;
+                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                        
-                       if (m->control_pressed) { 
-                                       remove(outputFileName.c_str()); 
-                                       remove(accnosFileName.c_str());
-                                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                       delete chimera;
-                                       return 0;
-                       }
+                       outputNames.push_back(outputFileName);
+                       outputNames.push_back(accnosFileName); 
                        
-                       //delete accnos file if its blank 
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
-               #endif
-               
-       #endif  
-       
-               delete chimera;
-               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       m->mothurOutEndLine();
+                       m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine();    
-               if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
                m->mothurOutEndLine();
-               m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
-               
+                       
                return 0;
                
        }
@@ -490,7 +533,7 @@ int ChimeraPintailCommand::driver(linePair* line, string outputFName, string fil
 }
 //**********************************************************************************************************************
 #ifdef USE_MPI
-int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<long>& MPIPos){
+int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
        try {
                                
                MPI_Status status; 
@@ -526,7 +569,6 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi
                
                                        //print results
                                        bool isChimeric = chimera->print(outMPI, outAccMPI);
-                                       if (isChimeric) { MPIWroteAccnos = true;  }
                                }
                        }
                        delete candidateSeq;