]> git.donarmstrong.com Git - mothur.git/commitdiff
paralellized screen.seqs and added mpi code to it. fixed bug with all mpi commands...
authorwestcott <westcott>
Thu, 29 Apr 2010 11:15:33 +0000 (11:15 +0000)
committerwestcott <westcott>
Thu, 29 Apr 2010 11:15:33 +0000 (11:15 +0000)
15 files changed:
aligncommand.cpp
chimeraccodecommand.cpp
chimeracheckcommand.cpp
chimerapintailcommand.cpp
chimeraslayercommand.cpp
classifyseqscommand.cpp
commandfactory.cpp
distancecommand.cpp
filterseqscommand.cpp
makefile
mothur.h
phylosummary.cpp
phylotree.cpp
screenseqscommand.cpp
screenseqscommand.h

index 37089b669a31e05af705d8be7a003be602c99185..a246ecc5806320efeadda6fa82f67fd2f2d99e21 100644 (file)
@@ -308,8 +308,9 @@ int AlignCommand::execute(){
                                        \r
                                        //figure out how many sequences you have to align\r
                                        numSeqsPerProcessor = numFastaSeqs / processors;\r
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }\r
                                        int startIndex =  pid * numSeqsPerProcessor;\r
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }\r
+                                       \r
                                \r
                                        //align your part\r
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);\r
@@ -328,8 +329,9 @@ int AlignCommand::execute(){
                                        \r
                                        //figure out how many sequences you have to align\r
                                        numSeqsPerProcessor = numFastaSeqs / processors;\r
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }\r
                                        int startIndex =  pid * numSeqsPerProcessor;\r
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }\r
+                                       \r
                                        \r
                                        //align your part\r
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);\r
index 0b96f2172359efc9b7ed5ab5c9dcc15e7223dbbc..cc046ea3dcd44c5443434d84493025791ab2910e 100644 (file)
@@ -238,8 +238,9 @@ int ChimeraCcodeCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                        
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -258,8 +259,9 @@ int ChimeraCcodeCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                                
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
index 625314a8f6a622618b5af87d8bc5889ee3fe8a3e..1b25861afa3e60dd6d8d614ba2c2c38d0f11dc3e 100644 (file)
@@ -196,8 +196,9 @@ int ChimeraCheckCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                        
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
@@ -216,8 +217,9 @@ int ChimeraCheckCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                                
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
index 62f1f2bae942f1c91ca1ee16bb21b6a4996c790e..adf060f485d862dc93797069a54bb58fbe25adfb 100644 (file)
@@ -254,8 +254,9 @@ int ChimeraPintailCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                        
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -274,8 +275,9 @@ int ChimeraPintailCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                                
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
index 58aecf58ccd10354c5aca094d20827c44fc00ff9..435cd301a53c42b0e00765d588c681f98da5dfed 100644 (file)
@@ -266,8 +266,9 @@ int ChimeraSlayerCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                        
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
@@ -286,8 +287,9 @@ int ChimeraSlayerCommand::execute(){
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
-                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
                                int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
+                               
                                
                                //align your part
                                driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
index 38378491c072c1ed33cc257ec954de6a81f3bb7d..0538bff07d99ca1266d488a5844fcde49afaf3bc 100644 (file)
@@ -382,8 +382,9 @@ int ClassifySeqsCommand::execute(){
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numFastaSeqs / processors;
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
                                        int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                                       
                                
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
@@ -401,8 +402,9 @@ int ClassifySeqsCommand::execute(){
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numFastaSeqs / processors;
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
                                        int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                                       
                                        
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
index cad0442747f2ff7a5ada7b24d4ba18ad9138643c..8d78d63e529a96e74a5a1205f68caa6ad5d75398 100644 (file)
@@ -132,7 +132,6 @@ CommandFactory::CommandFactory(){
        //commands["consensus"]                 = "consensus";
        commands["help"]                                = "help"; 
        commands["summary.seqs"]                = "summary.seqs";
-       commands["screen.seqs"]                 = "screen.seqs";
        commands["reverse.seqs"]                = "reverse.seqs";
        commands["trim.seqs"]                   = "trim.seqs";
        commands["list.seqs"]                   = "list.seqs";
@@ -163,6 +162,7 @@ CommandFactory::CommandFactory(){
        commands["chimera.slayer"]              = "MPIEnabled";
        commands["chimera.pintail"]             = "MPIEnabled";
        commands["chimera.bellerophon"] = "MPIEnabled";
+       commands["screen.seqs"]                 = "MPIEnabled";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
index da5d0f157390cb1a70c666ac86a9ecae1221f095..ec32ffeb9e2e28b7398f78c5dd5fd858de2326a0 100644 (file)
@@ -342,6 +342,14 @@ int DistanceCommand::execute(){
                
                if (output == "square") {  convertMatrix(outputFile); }
                
+               ifstream fileHandle;
+               fileHandle.open(outputFile.c_str());
+               if(fileHandle) {
+                       gobble(fileHandle);
+                       if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
+               }
+
+               
                #ifdef USE_MPI
                        }
                #endif
index b64e2f530ff91af78cc69dea71b9d2f53f3f981b..28e137376c51261498a4d616ae929ba33b1ff3be 100644 (file)
@@ -287,8 +287,9 @@ int FilterSeqsCommand::filterSequences() {
                                        
                                        //figure out how many sequences you have to do
                                        numSeqsPerProcessor = num / processors;
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
                                        int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                       
                                
                                        //do your part
                                        driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
@@ -309,8 +310,9 @@ int FilterSeqsCommand::filterSequences() {
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = num / processors;
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
                                        int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                       
                                        
                                        //align your part
                                        driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);           
@@ -580,8 +582,9 @@ string FilterSeqsCommand::createFilter() {
                                                                
                                                //figure out how many sequences you have to do
                                                numSeqsPerProcessor = num / processors;
-                                               if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
                                                int startIndex =  pid * numSeqsPerProcessor;
+                                               if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                               
                                
                                                //do your part
                                                MPICreateFilter(startIndex, numSeqsPerProcessor, F, inMPI, MPIPos);
@@ -597,8 +600,9 @@ string FilterSeqsCommand::createFilter() {
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = num / processors;
-                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
                                        int startIndex =  pid * numSeqsPerProcessor;
+                                       if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
+                                       
                                        
                                        //do your part
                                        MPICreateFilter(startIndex, numSeqsPerProcessor, F, inMPI,  MPIPos);
index 181b897a16f87b1f5e1cd2689d4c1621dbeac3c1..b15c44bbc235be6b21f7c4c2b7822c7aac0772d2 100644 (file)
--- a/makefile
+++ b/makefile
@@ -26,7 +26,8 @@ ifeq  ($(strip $(USEREADLINE)),yes)
       -L../readline-6.0\r
 endif\r
 \r
-USEMPI ?= no\r
+USEMPI ?= yes
+\r
 \r
 ifeq  ($(strip $(USEMPI)),yes)\r
     CC_OPTIONS += -DUSE_MPI\r
index 062c4814dfa4addeeb923f586c601717daca7c66..7f3608e73ee8770d8d777bde1be399c61027b515 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -865,14 +865,16 @@ inline void appendFiles(string temp, string filename) {
        
                //open output file in append mode
                openOutputFileAppend(filename, output);
-               openInputFile(temp, input);
+               int ableToOpen = openInputFile(temp, input, "no error");
                
-               while(char c = input.get()){
-                       if(input.eof())         {       break;                  }
-                       else                            {       output << c;    }
+               if (ableToOpen == 0) { //you opened it
+                       while(char c = input.get()){
+                               if(input.eof())         {       break;                  }
+                               else                            {       output << c;    }
+                       }
+                       input.close();
                }
                
-               input.close();
                output.close();
        }
        catch(exception& e) {
@@ -964,7 +966,7 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                
                        num = positions.size();
                
-                       FILE * pFile;
+                       /*FILE * pFile;
                        long size;
                
                        //get num bytes in file
@@ -974,7 +976,19 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                                fseek (pFile, 0, SEEK_END);
                                size=ftell (pFile);
                                fclose (pFile);
+                       }*/
+                       
+                       long size = positions[(positions.size()-1)];
+                       ifstream in;
+                       openInputFile(filename, in);
+                       
+                       in.seekg(size);
+               
+                       while(char c = in.get()){
+                               if(in.eof())            {       break;  }
+                               else                            {       size++; }
                        }
+                       in.close();
                
                        positions.push_back(size);
                
index a5f67c082945a9955feb2897e16b242d60533ffa..915a05ff74a14e7fac2a32da119c184ee9908293 100644 (file)
@@ -128,7 +128,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                currentNode = childPointer->second;
                        }else{  //otherwise, error
                                m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
-                               seqTaxonomy = "";
+                               break;
                        }
                        
                        level++;
@@ -168,7 +168,7 @@ void PhyloSummary::assignRank(int index){
 void PhyloSummary::print(ofstream& out){
        try {
                //print labels
-               out << "taxlevel\t rank ID\t label\t daughterlevels\t total\t";
+               out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
                if (groupmap != NULL) {
                        for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
                                out << groupmap->namesOfGroups[i] << '\t';
index 8c48e4f1f783c9387c83d4e5d1bfdbb475eb09cf..855eaf968b796424700f957c2ac78fa32db6d9f6 100644 (file)
@@ -373,7 +373,7 @@ void PhyloTree::binUnclassified(string file){
                        
                        //this sequence is unclassified at some levels
                        while(level != maxLevel){
-                       
+               
                                level++;
                        
                                string taxon = "unclassified";  
index 5b3d7d7604c4f0fe5460265fbd611e12dded4629..ab750321b8468670d626f5d5b724aba61a98c071 100644 (file)
@@ -22,7 +22,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                else {
                        //valid paramters for this command
                        string AlignArray[] =  {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength",
-                                                                       "name", "group", "alignreport","outputdir","inputdir"};
+                                                                       "name", "group", "alignreport","processors","outputdir","inputdir"};
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -117,6 +117,10 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "-1"; }
                        convert(temp, maxLength); 
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
+                       convert(temp, processors); 
+
                }
 
        }
@@ -163,68 +167,280 @@ int ScreenSeqsCommand::execute(){
                
                if (abort == true) { return 0; }
                                
-               ifstream inFASTA;
-               openInputFile(fastafile, inFASTA);
-               
-               set<string> badSeqNames;
-               
                string goodSeqFile = outputDir + getRootName(getSimpleName(fastafile)) + "good" + getExtension(fastafile);
                string badSeqFile =  outputDir + getRootName(getSimpleName(fastafile)) + "bad" + getExtension(fastafile);
+               string badAccnosFile =  outputDir + getRootName(getSimpleName(fastafile)) + "bad.accnos";
                
-               ofstream goodSeqOut;    openOutputFile(goodSeqFile, goodSeqOut);
-               ofstream badSeqOut;             openOutputFile(badSeqFile, badSeqOut);          
+               int numFastaSeqs = 0;
+               set<string> badSeqNames;
+               int start = time(NULL);
                
-               while(!inFASTA.eof()){
-                       if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+#ifdef USE_MPI 
+                       int pid, end, numSeqsPerProcessor; 
+                       int tag = 2001;
+                       vector<long> MPIPos;
                        
-                       Sequence currSeq(inFASTA);
-                       if (currSeq.getName() != "") {
-                               bool goodSeq = 1;               //      innocent until proven guilty
-                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
-                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
-                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
-                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                       MPI_Status status; 
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors); 
+
+                       MPI_File inMPI;
+                       MPI_File outMPIGood;
+                       MPI_File outMPIBad;
+                       MPI_File outMPIBadAccnos;
+                       
+                       int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
+                       int inMode=MPI_MODE_RDONLY; 
+                       
+                       char outGoodFilename[1024];
+                       strcpy(outGoodFilename, goodSeqFile.c_str());
+
+                       char outBadFilename[1024];
+                       strcpy(outBadFilename, badSeqFile.c_str());
+                       
+                       char outBadAccnosFilename[1024];
+                       strcpy(outBadAccnosFilename, badAccnosFile.c_str());
+
+                       char inFileName[1024];
+                       strcpy(inFileName, fastafile.c_str());
+                       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_open(MPI_COMM_WORLD, outGoodFilename, outMode, MPI_INFO_NULL, &outMPIGood);
+                       MPI_File_open(MPI_COMM_WORLD, outBadFilename, outMode, MPI_INFO_NULL, &outMPIBad);
+                       MPI_File_open(MPI_COMM_WORLD, outBadAccnosFilename, outMode, MPI_INFO_NULL, &outMPIBadAccnos);
+                       
+                       if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBad); MPI_File_close(&outMPIBadAccnos); return 0; }
+                       
+                       if (pid == 0) { //you are the root process 
                                
-                               if(goodSeq == 1){
-                                       currSeq.printSequence(goodSeqOut);      
+                               MPIPos = setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
+                               
+                               //send file positions to all processes
+                               MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numFastaSeqs / processors;
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                               
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBad, outMPIBadAccnos, MPIPos, badSeqNames);
+
+                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); MPI_File_close(&outMPIBad);  return 0; }
+
+                               for (int i = 1; i < processors; i++) {
+                               
+                                       //get bad lists
+                                       int badSize;
+                                       MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+                                       /*for (int j = 0; j < badSize; j++) {
+                                               int length;
+                                               MPI_Recv(&length, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);  //recv the length of the name
+                                               char* buf2 = new char[length];                                                                          //make space to recieve it
+                                               MPI_Recv(buf2, length, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);  //get name
+                                               
+                                               string tempBuf = buf2;
+                                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                                               delete buf2;
+                                               
+                                               badSeqNames.insert(tempBuf);
+                                       }*/
                                }
-                               else{
-                                       currSeq.printSequence(badSeqOut);       
-                                       badSeqNames.insert(currSeq.getName());
+                       }else{ //you are a child process
+                               MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPIPos.resize(numFastaSeqs+1);
+                               MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               
+                               //figure out how many sequences you have to align
+                               numSeqsPerProcessor = numFastaSeqs / processors;
+                               int startIndex =  pid * numSeqsPerProcessor;
+                               if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
+                               
+                               //align your part
+                               driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBad, outMPIBadAccnos, MPIPos, badSeqNames);
+
+                               if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBad); MPI_File_close(&outMPIBadAccnos); return 0; }
+                               
+                               //send bad list 
+                               int badSize = badSeqNames.size();
+                               MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                               
+                               /*
+                               set<string>::iterator it;
+                               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+                                       string name = *it;
+                                       int length = name.length();
+                                       char* buf2 = new char[length];
+                                       memcpy(buf2, name.c_str(), length);
+                                       
+                                       MPI_Send(&length, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
+                                       MPI_Send(buf2, length, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
+                               }*/
+                       }
+                       
+                       //close files 
+                       MPI_File_close(&inMPI);
+                       MPI_File_close(&outMPIGood);
+                       MPI_File_close(&outMPIBad);
+                       MPI_File_close(&outMPIBadAccnos);
+                                       
+#else
+                                       
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numFastaSeqs));
+                               
+                               driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+                               
+                               if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numFastaSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numFastaSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               createProcesses(goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames); 
+                               
+                               rename((goodSeqFile + toString(processIDS[0]) + ".temp").c_str(), goodSeqFile.c_str());
+                               rename((badSeqFile + toString(processIDS[0]) + ".temp").c_str(), badSeqFile.c_str());
+                               rename((badAccnosFile + toString(processIDS[0]) + ".temp").c_str(), badAccnosFile.c_str());
+                               
+                               //append alignment and report files
+                               for(int i=1;i<processors;i++){
+                                       appendFiles((goodSeqFile + toString(processIDS[i]) + ".temp"), goodSeqFile);
+                                       remove((goodSeqFile + toString(processIDS[i]) + ".temp").c_str());
+                                       
+                                       appendFiles((badSeqFile + toString(processIDS[i]) + ".temp"), badSeqFile);
+                                       remove((badSeqFile + toString(processIDS[i]) + ".temp").c_str());
+                                       
+                                       appendFiles((badAccnosFile + toString(processIDS[i]) + ".temp"), badAccnosFile);
+                                       remove((badAccnosFile + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                               if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+                               
+                               //read badSeqs in because root process doesnt know what other "bad" seqs the children found
+                               ifstream inBad;
+                               int ableToOpen = openInputFile(badAccnosFile, inBad, "no error");
+                               
+                               if (ableToOpen == 0) {
+                                       badSeqNames.clear();
+                                       string tempName;
+                                       while (!inBad.eof()) {
+                                               inBad >> tempName; gobble(inBad);
+                                               badSeqNames.insert(tempName);
+                                       }
+                                       inBad.close();
                                }
                        }
-                       gobble(inFASTA);
-               }
+       #else
+                       ifstream inFASTA;
+                       openInputFile(fastafile, inFASTA);
+                       numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       
+                       lines.push_back(new linePair(0, numFastaSeqs));
+                       
+                       driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+                       
+                       if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+                       
+       #endif
+
+#endif         
+
+               #ifdef USE_MPI
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                       if (pid == 0) { //only one process should fix files
+                       
+                               //read accnos file with all names in it, process 0 just has its names
+                               MPI_File inMPIAccnos;
+                               MPI_Offset size;
+                       
+                               char inFileName[1024];
+                               strcpy(inFileName, badAccnosFile.c_str());
+                       
+                               MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPIAccnos);  //comm, filename, mode, info, filepointer
+                               MPI_File_get_size(inMPIAccnos, &size);
+                       
+                               char* buffer = new char[size];
+                               MPI_File_read(inMPIAccnos, buffer, size, MPI_CHAR, &status);
                        
+                               string tempBuf = buffer;
+                               if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+                               istringstream iss (tempBuf,istringstream::in);
+
+                               delete buffer;
+                               MPI_File_close(&inMPIAccnos);
+                               
+                               badSeqNames.clear();
+                               string tempName;
+                               while (!iss.eof()) {
+                                       iss >> tempName; gobble(iss);
+                                       badSeqNames.insert(tempName);
+                               }
+               #endif
+                                                                                                                                                                       
                if(namefile != "" && groupfile != "")   {       
                        screenNameGroupFile(badSeqNames);       
-                       if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+                       if (m->control_pressed) {  remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
                }else if(namefile != "")        {       
                        screenNameGroupFile(badSeqNames);
-                       if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }  
+                       if (m->control_pressed) {  remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; } 
                }else if(groupfile != "")                               {       screenGroupFile(badSeqNames);           }       // this screens just the group
                
-               if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+               if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
 
                if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
                
-               goodSeqOut.close();
-               badSeqOut.close();
-               inFASTA.close();
-               
-               
                if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
+               
+               #ifdef USE_MPI
+                       }
+               #endif
 
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                m->mothurOut(goodSeqFile); m->mothurOutEndLine();       
                m->mothurOut(badSeqFile); m->mothurOutEndLine();        
+               m->mothurOut(badAccnosFile); m->mothurOutEndLine();     
                for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
                m->mothurOutEndLine();
+               m->mothurOutEndLine();
+
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
+               m->mothurOutEndLine();
 
-               
                return 0;
        }
        catch(exception& e) {
@@ -236,63 +452,118 @@ int ScreenSeqsCommand::execute(){
 //***************************************************************************************************************
 
 int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
+       try {
+               ifstream inputNames;
+               openInputFile(namefile, inputNames);
+               set<string> badSeqGroups;
+               string seqName, seqList, group;
+               set<string>::iterator it;
+
+               string goodNameFile = outputDir + getRootName(getSimpleName(namefile)) + "good" + getExtension(namefile);
+               string badNameFile = outputDir + getRootName(getSimpleName(namefile)) + "bad" + getExtension(namefile);
+               
+               outputNames.push_back(goodNameFile);  outputNames.push_back(badNameFile);
+               
+               ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
+               ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
+               
+               while(!inputNames.eof()){
+                       if (m->control_pressed) { goodNameOut.close(); badNameOut.close(); inputNames.close(); remove(goodNameFile.c_str()); remove(badNameFile.c_str()); return 0; }
 
-       ifstream inputNames;
-       openInputFile(namefile, inputNames);
-       set<string> badSeqGroups;
-       string seqName, seqList, group;
-       set<string>::iterator it;
-
-       string goodNameFile = outputDir + getRootName(getSimpleName(namefile)) + "good" + getExtension(namefile);
-       string badNameFile = outputDir + getRootName(getSimpleName(namefile)) + "bad" + getExtension(namefile);
-       
-       outputNames.push_back(goodNameFile);  outputNames.push_back(badNameFile);
-       
-       ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
-       ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
-       
-       while(!inputNames.eof()){
-               if (m->control_pressed) { goodNameOut.close(); badNameOut.close(); inputNames.close(); remove(goodNameFile.c_str()); remove(badNameFile.c_str()); return 0; }
-
-               inputNames >> seqName >> seqList;
-               it = badSeqNames.find(seqName);
-               
-               if(it != badSeqNames.end()){
-                       badSeqNames.erase(it);
-                       badNameOut << seqName << '\t' << seqList << endl;
-                       if(namefile != ""){
-                               int start = 0;
-                               for(int i=0;i<seqList.length();i++){
-                                       if(seqList[i] == ','){
-                                               badSeqGroups.insert(seqList.substr(start,i-start));
-                                               start = i+1;
-                                       }                                       
+                       inputNames >> seqName >> seqList;
+                       it = badSeqNames.find(seqName);
+                       
+                       if(it != badSeqNames.end()){
+                               badSeqNames.erase(it);
+                               badNameOut << seqName << '\t' << seqList << endl;
+                               if(namefile != ""){
+                                       int start = 0;
+                                       for(int i=0;i<seqList.length();i++){
+                                               if(seqList[i] == ','){
+                                                       badSeqGroups.insert(seqList.substr(start,i-start));
+                                                       start = i+1;
+                                               }                                       
+                                       }
+                                       badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
                                }
-                               badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
                        }
+                       else{
+                               goodNameOut << seqName << '\t' << seqList << endl;
+                       }
+                       gobble(inputNames);
                }
-               else{
-                       goodNameOut << seqName << '\t' << seqList << endl;
+               inputNames.close();
+               goodNameOut.close();
+               badNameOut.close();
+               
+               //we were unable to remove some of the bad sequences
+               if (badSeqNames.size() != 0) {
+                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                               m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
+                               m->mothurOutEndLine();
+                       }
                }
-               gobble(inputNames);
-       }
-       inputNames.close();
-       goodNameOut.close();
-       badNameOut.close();
-       
-       //we were unable to remove some of the bad sequences
-       if (badSeqNames.size() != 0) {
-               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                       m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
-                       m->mothurOutEndLine();
+
+               if(groupfile != ""){
+                       
+                       ifstream inputGroups;
+                       openInputFile(groupfile, inputGroups);
+
+                       string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile);
+                       string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile);
+                       
+                       outputNames.push_back(goodGroupFile);  outputNames.push_back(badGroupFile);
+                       
+                       ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
+                       ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
+                       
+                       while(!inputGroups.eof()){
+                               if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodNameFile.c_str()); remove(badNameFile.c_str()); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
+
+                               inputGroups >> seqName >> group;
+
+                               it = badSeqGroups.find(seqName);
+                               
+                               if(it != badSeqGroups.end()){
+                                       badSeqGroups.erase(it);
+                                       badGroupOut << seqName << '\t' << group << endl;
+                               }
+                               else{
+                                       goodGroupOut << seqName << '\t' << group << endl;
+                               }
+                               gobble(inputGroups);
+                       }
+                       inputGroups.close();
+                       goodGroupOut.close();
+                       badGroupOut.close();
+                       
+                       //we were unable to remove some of the bad sequences
+                       if (badSeqGroups.size() != 0) {
+                               for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
+                                       m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
+                                       m->mothurOutEndLine();
+                               }
+                       }
                }
+                       
+               return 0;
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenNameGroupFile");
+               exit(1);
        }
+}
 
-       if(groupfile != ""){
-               
+//***************************************************************************************************************
+
+int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
+       try {
                ifstream inputGroups;
                openInputFile(groupfile, inputGroups);
-
+               string seqName, group;
+               set<string>::iterator it;
+               
                string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile);
                string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile);
                
@@ -302,14 +573,13 @@ int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
                ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
                
                while(!inputGroups.eof()){
-                       if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodNameFile.c_str()); remove(badNameFile.c_str()); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
+                       if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
 
                        inputGroups >> seqName >> group;
-
-                       it = badSeqGroups.find(seqName);
+                       it = badSeqNames.find(seqName);
                        
-                       if(it != badSeqGroups.end()){
-                               badSeqGroups.erase(it);
+                       if(it != badSeqNames.end()){
+                               badSeqNames.erase(it);
                                badGroupOut << seqName << '\t' << group << endl;
                        }
                        else{
@@ -317,142 +587,280 @@ int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
                        }
                        gobble(inputGroups);
                }
-               inputGroups.close();
-               goodGroupOut.close();
-               badGroupOut.close();
                
+               if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
+
                //we were unable to remove some of the bad sequences
-               if (badSeqGroups.size() != 0) {
-                       for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
-                               m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
+               if (badSeqNames.size() != 0) {
+                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                               m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
                                m->mothurOutEndLine();
                        }
                }
-       }
                
-       return 0;
+               inputGroups.close();
+               goodGroupOut.close();
+               badGroupOut.close();
+               
+               if (m->control_pressed) { remove(goodGroupFile.c_str()); remove(badGroupFile.c_str());  }
 
+               
+               return 0;
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenGroupFile");
+               exit(1);
+       }
 }
 
 //***************************************************************************************************************
 
-int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
-
-       ifstream inputGroups;
-       openInputFile(groupfile, inputGroups);
-       string seqName, group;
-       set<string>::iterator it;
-       
-       string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile);
-       string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile);
-       
-       outputNames.push_back(goodGroupFile);  outputNames.push_back(badGroupFile);
-       
-       ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
-       ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
-       
-       while(!inputGroups.eof()){
-               if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
-
-               inputGroups >> seqName >> group;
-               it = badSeqNames.find(seqName);
+int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
+       try {
+               ifstream inputAlignReport;
+               openInputFile(alignreport, inputAlignReport);
+               string seqName, group;
+               set<string>::iterator it;
+               
+               string goodAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "good" + getExtension(alignreport);
+               string badAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "bad" + getExtension(alignreport);
                
-               if(it != badSeqNames.end()){
-                       badSeqNames.erase(it);
-                       badGroupOut << seqName << '\t' << group << endl;
+               outputNames.push_back(goodAlignReportFile);  outputNames.push_back(badAlignReportFile);
+               
+               ofstream goodAlignReportOut;    openOutputFile(goodAlignReportFile, goodAlignReportOut);
+               ofstream badAlignReportOut;             openOutputFile(badAlignReportFile, badAlignReportOut);          
+
+               while (!inputAlignReport.eof()) {               //      need to copy header
+                       char c = inputAlignReport.get();
+                       goodAlignReportOut << c;
+                       badAlignReportOut << c;
+                       if (c == 10 || c == 13){        break;  }       
                }
-               else{
-                       goodGroupOut << seqName << '\t' << group << endl;
+
+               while(!inputAlignReport.eof()){
+                       if (m->control_pressed) { goodAlignReportOut.close(); badAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
+
+                       inputAlignReport >> seqName;
+                       it = badSeqNames.find(seqName);
+                       string line;            
+                       while (!inputAlignReport.eof()) {               //      need to copy header
+                               char c = inputAlignReport.get();
+                               line += c;
+                               if (c == 10 || c == 13){        break;  }       
+                       }
+                       
+                       if(it != badSeqNames.end()){
+                               badSeqNames.erase(it);
+                               badAlignReportOut << seqName << '\t' << line;
+                       }
+                       else{
+                               goodAlignReportOut << seqName << '\t' << line;
+                       }
+                       gobble(inputAlignReport);
                }
-               gobble(inputGroups);
-       }
-       
-       if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
+               
+               if (m->control_pressed) { goodAlignReportOut.close(); badAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
 
-       //we were unable to remove some of the bad sequences
-       if (badSeqNames.size() != 0) {
-               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                       m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
-                       m->mothurOutEndLine();
+               //we were unable to remove some of the bad sequences
+               if (badSeqNames.size() != 0) {
+                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                               m->mothurOut("Your file does not include the sequence " + *it + " please correct."); 
+                               m->mothurOutEndLine();
+                       }
                }
-       }
-       
-       inputGroups.close();
-       goodGroupOut.close();
-       badGroupOut.close();
-       
-       if (m->control_pressed) { remove(goodGroupFile.c_str()); remove(badGroupFile.c_str());  }
 
+               inputAlignReport.close();
+               goodAlignReportOut.close();
+               badAlignReportOut.close();
+                               
+               if (m->control_pressed) {  remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
+               
+               return 0;
        
-       return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
+               exit(1);
+       }
        
 }
+//**********************************************************************************************************************
 
-//***************************************************************************************************************
+int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+       try {
+               ofstream goodFile;
+               openOutputFile(goodFName, goodFile);
+               
+               ofstream badFile;
+               openOutputFile(badFName, badFile);
+               
+               ofstream badAccnosFile;
+               openOutputFile(badAccnosFName, badAccnosFile);
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
 
-int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
+               inFASTA.seekg(line->start);
        
-       ifstream inputAlignReport;
-       openInputFile(alignreport, inputAlignReport);
-       string seqName, group;
-       set<string>::iterator it;
-       
-       string goodAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "good" + getExtension(alignreport);
-       string badAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "bad" + getExtension(alignreport);
-       
-       outputNames.push_back(goodAlignReportFile);  outputNames.push_back(badAlignReportFile);
-       
-       ofstream goodAlignReportOut;    openOutputFile(goodAlignReportFile, goodAlignReportOut);
-       ofstream badAlignReportOut;             openOutputFile(badAlignReportFile, badAlignReportOut);          
-
-       while (!inputAlignReport.eof()) {               //      need to copy header
-               char c = inputAlignReport.get();
-               goodAlignReportOut << c;
-               badAlignReportOut << c;
-               if (c == 10 || c == 13){        break;  }       
+               for(int i=0;i<line->numSeqs;i++){
+                       
+                       if (m->control_pressed) {  return 0; }
+                       
+                       Sequence currSeq(inFASTA);
+                       if (currSeq.getName() != "") {
+                               bool goodSeq = 1;               //      innocent until proven guilty
+                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
+                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
+                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                               
+                               if(goodSeq == 1){
+                                       currSeq.printSequence(goodFile);        
+                               }
+                               else{
+                                       currSeq.printSequence(badFile); 
+                                       badAccnosFile << currSeq.getName() << endl;
+                                       badSeqNames.insert(currSeq.getName());
+                               }
+                       }
+                       gobble(inFASTA);
+               }
+               
+                       
+               goodFile.close();
+               inFASTA.close();
+               badFile.close();
+               badAccnosFile.close();
+               
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "driver");
+               exit(1);
        }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badFile, MPI_File& badAccnosFile, vector<long>& MPIPos, set<string>& badSeqNames){
+       try {
+               string outputString = "";
+               MPI_Status statusGood; 
+               MPI_Status statusBad; 
+               MPI_Status statusBadAccnos; 
+               MPI_Status status; 
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+               for(int i=0;i<num;i++){
+               
+                       if (m->control_pressed) {  return 0; }
+                       
+                       //read next sequence
+                       int length = MPIPos[start+i+1] - MPIPos[start+i];
 
-       while(!inputAlignReport.eof()){
-               if (m->control_pressed) { goodAlignReportOut.close(); badAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
+                       char* buf4 = new char[length];
+                       memcpy(buf4, outputString.c_str(), length);
 
-               inputAlignReport >> seqName;
-               it = badSeqNames.find(seqName);
-               string line;            
-               while (!inputAlignReport.eof()) {               //      need to copy header
-                       char c = inputAlignReport.get();
-                       line += c;
-                       if (c == 10 || c == 13){        break;  }       
+                       MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+                       
+                       string tempBuf = buf4;  delete buf4;
+                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
+                       istringstream iss (tempBuf,istringstream::in);
+                       
+                       Sequence currSeq(iss);                  
+                       
+                       //process seq
+                       if (currSeq.getName() != "") {
+                               bool goodSeq = 1;               //      innocent until proven guilty
+                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
+                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
+                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                               
+                               if(goodSeq == 1){
+                                       outputString =  ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
+                               
+                                       //print good seq
+                                       length = outputString.length();
+                                       char* buf2 = new char[length];
+                                       memcpy(buf2, outputString.c_str(), length);
+                                       
+                                       MPI_File_write_shared(goodFile, buf2, length, MPI_CHAR, &statusGood);
+                                       delete buf2;
+                               }
+                               else{
+                                       outputString =  ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
+                               
+                                       //print bad seq to fasta
+                                       length = outputString.length();
+                                       char* buf2 = new char[length];
+                                       memcpy(buf2, outputString.c_str(), length);
+                                       
+                                       MPI_File_write_shared(badFile, buf2, length, MPI_CHAR, &statusBad);
+                                       delete buf2;
+
+                                       badSeqNames.insert(currSeq.getName());
+                                       
+                                       //write to bad accnos file
+                                       outputString = currSeq.getName() + "\n";
+                               
+                                       length = outputString.length();
+                                       char* buf3 = new char[length];
+                                       memcpy(buf3, outputString.c_str(), length);
+                                       
+                                       MPI_File_write_shared(badAccnosFile, buf3, length, MPI_CHAR, &statusBadAccnos);
+                                       delete buf3;
+                               }
+                       }
                }
+                               
+               return 1;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "driverMPI");
+               exit(1);
+       }
+}
+#endif
+/**************************************************************************************************/
+
+int ScreenSeqsCommand::createProcesses(string goodFileName, string badFileName, string badAccnos, string filename, set<string>& badSeqNames) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               int exitCommand = 1;
                
-               if(it != badSeqNames.end()){
-                       badSeqNames.erase(it);
-                       badAlignReportOut << seqName << '\t' << line;;
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               exitCommand = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
-               else{
-                       goodAlignReportOut << seqName << '\t' << line;
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
                }
-               gobble(inputAlignReport);
+               
+               return exitCommand;
+#endif         
        }
-       
-       if (m->control_pressed) { goodAlignReportOut.close(); badAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
-
-       //we were unable to remove some of the bad sequences
-       if (badSeqNames.size() != 0) {
-               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
-                       m->mothurOut("Your file does not include the sequence " + *it + " please correct."); 
-                       m->mothurOutEndLine();
-               }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
+               exit(1);
        }
-
-       inputAlignReport.close();
-       goodAlignReportOut.close();
-       badAlignReportOut.close();
-                       
-       if (m->control_pressed) {  remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
-       
-       return 0;
-
-       
 }
 
 //***************************************************************************************************************
index 54f4bb153d66b89728a1d10fd073cb14f3957779..f1b72051250d07a389ed332b0690cbcda3b807c0 100644 (file)
@@ -21,13 +21,29 @@ public:
        void help();
        
 private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
+
        int screenNameGroupFile(set<string>);
        int screenGroupFile(set<string>);
        int screenAlignReport(set<string>);
        
+       int driver(linePair*, string, string, string, string, set<string>&);
+       int createProcesses(string, string, string, string, set<string>&);
+       
+       #ifdef USE_MPI
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<long>&, set<string>&);
+       #endif
+
        bool abort;
        string fastafile, namefile, groupfile, alignreport, outputDir;
-       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
+       int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors;
        vector<string> outputNames;
 };