]> git.donarmstrong.com Git - mothur.git/commitdiff
testing mpi
authorwestcott <westcott>
Thu, 11 Mar 2010 13:20:55 +0000 (13:20 +0000)
committerwestcott <westcott>
Thu, 11 Mar 2010 13:20:55 +0000 (13:20 +0000)
16 files changed:
bellerophon.cpp
chimeraseqscommand.cpp
commandfactory.cpp
commandfactory.hpp
engine.cpp
filters.h
filterseqscommand.cpp
filterseqscommand.h
getseqscommand.cpp
mothur.cpp
mothur.h
mothurout.h
removeseqscommand.cpp
sequence.cpp
sequence.hpp
sparsematrix.hpp

index 2c545fea5b141eff17bb9bf6aa3a8ea9745f9582..54dfb9b67ecf5e764e3c159dffcc47d6fd7004dc 100644 (file)
@@ -197,6 +197,7 @@ int Bellerophon::getChimeras() {
                                
                                if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
                                
+                               left.clear(); right.clear();
                                vector<SeqMap> distMapRight;
                                vector<SeqMap> distMapLeft;
                                
@@ -251,6 +252,8 @@ int Bellerophon::getChimeras() {
                //sort Preferences highest to lowest
                sort(pref.begin(), pref.end(), comparePref);
                
+               for (int i = 0; i < seqs.size(); i++) { delete seqs[i];  }  seqs.clear();
+               
                return 0;
                
        }
index 69ce031a50c37caed255347378979e3b94d37b62..65e082b93a6a97e162af3aca3aa576915f22bd18 100644 (file)
@@ -340,7 +340,8 @@ int ChimeraSeqsCommand::execute(){
                        m->mothurOut(outputFileName); m->mothurOutEndLine();    
                        if (hasAccnos) {  m->mothurOut(accnosFileName); m->mothurOutEndLine();  }
                        m->mothurOutEndLine();
-
+                       
+                       delete chimera;
                        return 0;
                }
                
index dd97b2f0d801dc64f4a9f110957c093dab136b04..b4f36549c0c65a22d0b2ad6057b9e07e294a4c65 100644 (file)
@@ -117,7 +117,7 @@ CommandFactory::CommandFactory(){
        commands["bootstrap.shared"]    = "bootstrap.shared";
        //commands["consensus"]                 = "consensus";
        commands["help"]                                = "help"; 
-       commands["filter.seqs"]                 = "filter.seqs";
+       commands["filter.seqs"]                 = "MPIEnabled";
        commands["align.seqs"]                  = "align.seqs";
        commands["summary.seqs"]                = "summary.seqs";
        commands["screen.seqs"]                 = "screen.seqs";
@@ -131,7 +131,7 @@ CommandFactory::CommandFactory(){
        commands["align.check"]                 = "align.check";
        commands["get.sharedseqs"]              = "get.sharedseqs";
        commands["get.otulist"]                 = "get.otulist";
-       commands["quit"]                                = "quit"; 
+       commands["quit"]                                = "MPIEnabled"; 
        commands["hcluster"]                    = "hcluster"; 
        commands["classify.seqs"]               = "classify.seqs"; 
        commands["phylotype"]                   = "phylotype";
@@ -145,6 +145,17 @@ CommandFactory::CommandFactory(){
 }
 /***********************************************************/
 
+/***********************************************************/
+bool CommandFactory::MPIEnabled(string commandName) {
+       bool mpi = false;
+       it = commands.find(commandName);
+       if (it != commands.end()) { 
+               if (it->second == "MPIEnabled") { return true; }
+       }
+       return mpi;
+}
+/***********************************************************/
+
 /***********************************************************/
 CommandFactory::~CommandFactory(){
        _uniqueInstance = 0;
index e1f62da1444022618454df7078180ed488edf325..6f96e90b4813e961222c60f9bb7390188d19c3e5 100644 (file)
@@ -25,6 +25,7 @@ public:
        void setOutputDirectory(string o)       {       outputDir = o;          }
        void setInputDirectory(string i)        {       inputDir = i;           }
        string getOutputDir()                           {       return outputDir;       }
+       bool MPIEnabled(string);
 
 private:
        Command* command;
index 3bdc484ac64a0a1140feee90e299696c2eb292c3..2d5fd75959d479826f5239b364879ff316f4eaca 100644 (file)
@@ -54,8 +54,18 @@ bool InteractEngine::getInput(){
                        mout->mothurOutEndLine();
                        
                        input = getCommand();   
+                       #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) {
+                       #endif
                        mout->mothurOutEndLine();       
                        
+                       #ifdef USE_MPI
+                               }
+                       #endif
+                       
                        if (mout->control_pressed) { input = "quit()"; }
                        
                        //allow user to omit the () on the quit command
@@ -68,11 +78,22 @@ bool InteractEngine::getInput(){
                        
                        if (commandName != "") {
                                mout->executing = true;
+                               
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+                               #endif
                                //executes valid command
                                Command* command = cFactory->getCommand(commandName, options);
                                quitCommandCalled = command->execute();
                                mout->control_pressed = 0;
                                mout->executing = false;
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
                        }else {
                                mout->mothurOut("Your input contains errors. Please try again."); 
                                mout->mothurOutEndLine();
@@ -99,21 +120,83 @@ string Engine::getCommand()  {
                                        cout << nextCommand << endl;
                                }       
                                
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) { //only one process should output to screen
+                               #endif
+
                                mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
+                               
                                return nextCommand;
                        #else
                                string nextCommand = "";
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) { //only one process should output to screen
+                               #endif
+
                                mout->mothurOut("mothur > ");
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
+                               
                                getline(cin, nextCommand);
+                               
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) { //only one process should output to screen
+                               #endif
+                               
                                mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
+
                                return nextCommand;
                        #endif
                #else
                        string nextCommand = "";
-                       mout->mothurOut("mothur > ");
-                       getline(cin, nextCommand);
-                       mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
-                       return nextCommand;
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) { //only one process should output to screen
+                               #endif
+
+                               mout->mothurOut("mothur > ");
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
+                               
+                               getline(cin, nextCommand);
+                               
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) { //only one process should output to screen
+                               #endif
+                               
+                               mout->mothurOutJustToLog(toString(nextCommand));
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
+
+                               return nextCommand;
                #endif
                
                mout->mothurOutEndLine();
@@ -169,10 +252,21 @@ bool BatchEngine::getInput(){
                        
                        if (input[0] != '#') {
                                
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) { //only one process should output to screen
+                               #endif
+
                                mout->mothurOutEndLine();
                                mout->mothurOut("mothur > " + input);
                                mout->mothurOutEndLine();
                                
+                               #ifdef USE_MPI
+                                       }
+                               #endif
+                               
                                if (mout->control_pressed) { input = "quit()"; }
                                
                                //allow user to omit the () on the quit command
@@ -184,11 +278,21 @@ bool BatchEngine::getInput(){
                                                                                
                                if (commandName != "") {
                                        mout->executing = true;
+                                       #ifdef USE_MPI
+                                               int pid;
+                                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                               if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+                                       #endif
                                        //executes valid command
                                        Command* command = cFactory->getCommand(commandName, options);
                                        quitCommandCalled = command->execute();
                                        mout->control_pressed = 0;
                                        mout->executing = false;
+                               
+                                       #ifdef USE_MPI
+                                               }
+                                       #endif
                                }else {         
                                        mout->mothurOut("Invalid."); 
                                        mout->mothurOutEndLine();
@@ -250,11 +354,21 @@ bool ScriptEngine::getInput(){
                        
                        if (input == "") { input = "quit()"; }
                        
-                       
+                       #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if (pid == 0) {
+                       #endif
+
                        mout->mothurOutEndLine();
                        mout->mothurOut("mothur > " + input);
                        mout->mothurOutEndLine();
-
+                       
+                       #ifdef USE_MPI
+                                       }
+                       #endif
+                       
                        if (mout->control_pressed) { input = "quit()"; }
                                
                        //allow user to omit the () on the quit command
@@ -266,11 +380,21 @@ bool ScriptEngine::getInput(){
                                                                                
                        if (commandName != "") {
                                mout->executing = true;
+                               #ifdef USE_MPI
+                                       int pid;
+                                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+                               #endif
                                //executes valid command
                                Command* command = cFactory->getCommand(commandName, options);
                                quitCommandCalled = command->execute();
                                mout->control_pressed = 0;
                                mout->executing = false;
+                               
+                               #ifdef USE_MPI
+                                       }
+                               #endif
                        }else {         
                                mout->mothurOut("Invalid."); 
                                mout->mothurOutEndLine();
index 4c396070a911000018710a3b652f2f81439e1db7..94f8d33c0329793f3e1c39c0ea0c467f53493357 100644 (file)
--- a/filters.h
+++ b/filters.h
@@ -28,6 +28,8 @@ public:
        void setSoft(float s)           {               soft = s;               }
        void setTrump(float t)          {               trump = t;              }
        void setNumSeqs(int num)        {       numSeqs = num;          }
+       vector<int> a, t, g, c, gap;
+       
        
        void initialize() {
                a.assign(alignmentLength, 0);
@@ -89,7 +91,6 @@ public:
                
 protected:
        string filter;
-       vector<int> a, t, g, c, gap;
        int alignmentLength, numSeqs;
        float soft;
        char trump;
index 0e33ab2047359b432886384cc41d60a806fbdc7c..326be01ff722fda90a61e0dd887f8d3e57107be4 100644 (file)
@@ -131,6 +131,13 @@ FilterSeqsCommand::FilterSeqsCommand(string option)  {
 
 void FilterSeqsCommand::help(){
        try {
+               #ifdef USE_MPI
+                               int pid;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                               if (pid == 0) {
+               #endif
+               
                m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
                m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
                m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
@@ -144,6 +151,10 @@ void FilterSeqsCommand::help(){
                m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
                
+               #ifdef USE_MPI
+                       }
+               #endif
+               
        }
        catch(exception& e) {
                m->errorOut(e, "FilterSeqsCommand", "help");
@@ -221,7 +232,13 @@ int FilterSeqsCommand::execute() {
                
                if (m->control_pressed) {  for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
 
-               
+               #ifdef USE_MPI
+                               int pid;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                               if (pid == 0) {
+               #endif
+
                m->mothurOutEndLine();
                m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
                m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
@@ -233,6 +250,10 @@ int FilterSeqsCommand::execute() {
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
                m->mothurOutEndLine();
+               
+               #ifdef USE_MPI
+                       }
+               #endif
 
                return 0;
                
@@ -267,72 +288,156 @@ string FilterSeqsCommand::createFilter() {
                        for (int s = 0; s < fastafileNames.size(); s++) {
                        
                                for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       
+#ifdef USE_MPI 
+                               int pid, rc, ierr; 
+                               char* buf;
+                               int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
                                
-                               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                                       if(processors == 1){
-                                               ifstream inFASTA;
-                                               openInputFile(fastafileNames[s], inFASTA);
-                                               int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-                                               inFASTA.close();
+                               MPI_Status status; 
+                               MPI_File in; 
+                               rc = MPI_Comm_size(MPI_COMM_WORLD, &processors);
+                               rc = MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
                                                
-                                               numSeqs += numFastaSeqs;
-                               
-                                               lines.push_back(new linePair(0, numFastaSeqs));
-                               
-                                               driverCreateFilter(F, fastafileNames[s], lines[0]);
-                                       }else{
-                                               vector<int> positions;
-                               
-                                               ifstream inFASTA;
-                                               openInputFile(fastafileNames[s], inFASTA);
-                               
-                                               string input;
-                                               while(!inFASTA.eof()){
-                                                       input = getline(inFASTA);
-                                                       if (input.length() != 0) {
-                                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                                       
+                               char* tempFileName = &(fastafileNames[s][0]);
+                               MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &in);  //comm, filename, mode, info, filepointer
+                                                               
+                               if (pid == 0) { //you are the root process
+                                               setLines(fastafileNames[s]);
+                                               
+                                               for (int j = 0; j < lines.size(); j++) { //each process
+                                                       if (j != 0) { //don't send to yourself
+                                                               MPI_Send(&lines[j]->start, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //start position in file
+                                                               MPI_Send(&lines[j]->numSeqs, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how many sequences we are sending
+                                                               MPI_Send(&bufferSizes[j], 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how bytes for the read
                                                        }
                                                }
-                                               inFASTA.close();
-                               
-                                               int numFastaSeqs = positions.size();
+                                               cout << "done sending" << endl;
+                                               cout << "parent = " << pid << " lines = " << lines[pid]->start << '\t' << lines[pid]->numSeqs << " size = " <<  lines.size() << endl;   
+                                               
+                                               buf = new char(bufferSizes[0]);
+                       cout << pid << '\t' << bufferSizes[0] << " line 1 start pos = " << lines[1]->start   << " buffer size 0 " << bufferSizes[0] << " buffer size 1 " << bufferSizes[1] << endl;                     
+                                               MPI_File_read_at(in, 0, buf, bufferSizes[0], MPI_CHAR, &status);
+                                               
+               cout << pid << " done reading " << endl;
+                                               string tempBuf = buf;
+                       cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;                                                                 
+                                               //do your part
+                                               MPICreateFilter(F, tempBuf);
+                                               
+                                               vector<int> temp; temp.resize(numSeqs);
+                                               
+                                               //get the frequencies from the child processes
+                                               for(int i = 0; i < ((processors-1)*5); i++) { 
+                               cout << "i = " << i << endl;
+                                                       int ierr = MPI_Recv(&temp, numSeqs, MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status); 
+                                                       
+                                                       int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
+                                                       
+                                                       int sender = status.MPI_SOURCE; 
+                                                       
+                                                       if (receiveTag == Atag) { //you are recieveing the A frequencies
+                                                               for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
+                                                       }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
+                                                               for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
+                                                       }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
+                                                               for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
+                                                       }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
+                                                               for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
+                                                       }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
+                                                               for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
+                                                       }
+                                                       
+                                                       m->mothurOut("receive tag = " + toString(receiveTag) + " " + toString(sender) + " is complete."); m->mothurOutEndLine();
+                                               } 
+
                                                
-                                               numSeqs += numFastaSeqs;
+                               }else { //i am the child process
+                                       int startPos, numLines, bufferSize;
+                               cout << "child = " << pid << endl;
+                                       ierr = MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                       ierr = MPI_Recv(&numLines, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                                       ierr = MPI_Recv(&bufferSize, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                               cout << "child = " << pid << " done recv messages startpos = " << startPos << " numLines = " << numLines << " buffersize = " << bufferSize << endl;     
                                
-                                               int numSeqsPerProcessor = numFastaSeqs / processors;
+                                       
+                                       //send freqs
+                                       char* buf2 = new char(bufferSize);
+                                       MPI_File_read_at( in, startPos, buf2, bufferSize, MPI_CHAR, &status);
+                               cout << pid << " done reading " << endl;
+                                       
+                                       string tempBuf = buf2;
+                       cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
+                                       MPICreateFilter(F, tempBuf);
                                
-                                               for (int i = 0; i < processors; i++) {
-                                                       long int startPos = positions[ i * numSeqsPerProcessor ];
-                                                       if(i == processors - 1){
-                                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                                                       }
-                                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                                               }
+                                       //send my fequency counts
+                                       F.a.push_back(Atag);
+                                       int ierr = MPI_Send( &F.a[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                                       F.t.push_back(Ttag);
+                                       ierr = MPI_Send( &F.t[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                                       F.c.push_back(Ctag);
+                                       ierr = MPI_Send( &F.c[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                                       F.g.push_back(Gtag);
+                                       ierr = MPI_Send( &F.g[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                                       F.gap.push_back(Gaptag);
+                                       ierr = MPI_Send( &F.gap[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+                                       
+                                       cout << "child " << pid << " done sending counts" << endl;
+                               }
                                
-                                               createProcessesCreateFilter(F, fastafileNames[s]); 
-                                       }
-                               #else
+                               MPI_Barrier(MPI_COMM_WORLD);
+                               
+#else
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
                                        ifstream inFASTA;
                                        openInputFile(fastafileNames[s], inFASTA);
                                        int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
                                        inFASTA.close();
-                                               
+                                       
                                        numSeqs += numFastaSeqs;
-                               
+                                       
                                        lines.push_back(new linePair(0, numFastaSeqs));
+                                       
+                                       driverCreateFilter(F, fastafileNames[s], lines[0]);
+                               }else{
+                                       
+                                       setLines(fastafileNames[s]);                                    
+                                       createProcessesCreateFilter(F, fastafileNames[s]); 
+                               }
+               #else
+                               ifstream inFASTA;
+                               openInputFile(fastafileNames[s], inFASTA);
+                               int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
                                
-                                       driverCreateFilter(F, lines[0], fastafileNames[s]);
-                               #endif
-                       
+                               numSeqs += numFastaSeqs;
+                               
+                               lines.push_back(new linePair(0, numFastaSeqs));
+                               
+                               driverCreateFilter(F, lines[0], fastafileNames[s]);
+               #endif
+#endif
                        
                        }
                }
-               
+
+#ifdef USE_MPI
+
+//merge all frequency data and create filter string
+                                       //int pid;
+                                       //MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+                                       
+                                       //if (pid == 0) { //only one process should output to screen
+#endif
+
+       cout << "made it here" << endl; 
                F.setNumSeqs(numSeqs);
                                
                if(isTrue(vertical) == 1)       {       F.doVertical(); }
                if(soft != 0)                           {       F.doSoft();             }
-               
+//cout << "Filter String = " << F.getFilter() << endl;                 
                filterString = F.getFilter();
 
                return filterString;
@@ -361,8 +466,14 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
                                        if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
                                        cout.flush();
                        }
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
                }
-                               
+               
+               //report progress
+               if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine();           }
+               
                in.close();
                
                return 0;
@@ -372,6 +483,38 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair*
                exit(1);
        }
 }
+/**************************************************************************************/
+int FilterSeqsCommand::MPICreateFilter(Filters& F, string temp) {      
+       try {
+               
+               vector<string> seqStrings;
+               parseBuffer(temp, seqStrings);
+               
+               for(int i=0;i<seqStrings.size();i++){
+                               
+                       if (m->control_pressed) { return 1; }
+                       
+                       Sequence seq("", seqStrings[0]);
+                       
+                       if(trump != '*'){       F.doTrump(seq); }
+                       if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
+                       cout.flush();
+                                               
+                       //report progress
+                       if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
+               }
+               
+               //report progress
+               if((seqStrings.size()) % 100 != 0){     m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine();               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
+               exit(1);
+       }
+}
+
 /**************************************************************************************************/
 
 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
@@ -408,4 +551,84 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                exit(1);
        }
 }
+/**************************************************************************************************/
+
+int FilterSeqsCommand::setLines(string filename) {
+       try {
+               vector<int> positions;
+               map<int, int> buf;
+               bufferSizes.clear();
+               
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+       
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+                       
+               string input;
+               int numbuf = 0;
+               while(!inFASTA.eof()){  
+                       input = getline(inFASTA);
+
+                       if (input.length() != 0) {
+                               numbuf += input.length();
+                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  buf[(pos - input.length() - 1)] = numbuf; }
+                       }
+               }
+
+               inFASTA.close();
+               
+               int numFastaSeqs = positions.size();
+               
+               numSeqs += numFastaSeqs;
+               
+               int numSeqsPerProcessor = numFastaSeqs / processors;
+               
+               for (int i = 0; i < processors; i++) {
+
+                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                       if(i == processors - 1){
+                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+                               bufferSizes.push_back(numbuf-buf[startPos]);
+                       }else{  
+                               int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
+                               bufferSizes.push_back(buf[myEnd]-buf[startPos]);
+                       }
+                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FilterSeqsCommand", "setLines");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
+       try {
+               
+               istringstream iss (file,istringstream::in);
+               string name, seqstring;
+               
+               while (iss) {
+                       
+                       if (m->control_pressed) { return 0; }
+               cout << "here" << endl;                 
+                       Sequence seq(iss); 
+       cout << "here1" << endl;                        
+                       gobble(iss);
+       cout << seq.getName() << endl;          
+                       if (seq.getName() != "") {
+                               seqs.push_back(seq.getAligned());       
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
+               exit(1);
+       }
+}
 /**************************************************************************************/
index 232b0acb9966c19d0dafe9a3875dbc702e925754..3cc007c059c63de5f28eee57ac0afcf78d00dc33 100644 (file)
@@ -35,6 +35,7 @@ private:
        string vertical, filter, fasta, hard, outputDir, filterFileName;
        vector<string> fastafileNames;  
        int alignmentLength, processors;
+       vector<int> bufferSizes;
 
        char trump;
        bool abort;
@@ -44,6 +45,10 @@ private:
        string createFilter();
        int createProcessesCreateFilter(Filters&, string);
        int driverCreateFilter(Filters&, string, linePair*);
+       int MPICreateFilter(Filters&, string);  
+       int setLines(string);
+       int parseBuffer(string, vector<string>&);
+       
 };
 
 #endif
index 7535e65ed39065f3a5788c07a6a2e0580f576aaa..f40c89b3fb3eff837729bc203055d11ef1a3c5a5 100644 (file)
@@ -277,10 +277,13 @@ int GetSeqsCommand::readList(){
                                }
                        
                                //get last name
-                               if (names.count(binnames) == 1) {  newNames += binnames;  }
+                               if (names.count(binnames) == 1) {  newNames += binnames + ",";  }
 
                                //if there are names in this bin add to new list
-                               if (newNames != "") {  newList.push_back(newNames);     }
+                               if (newNames != "") { 
+                                       newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+                                       newList.push_back(newNames);    
+                               }
                        }
                                
                        //print new listvector
index a3eef76a2766d5ae69c824b0ee4cf4b5e3f14a51..6631489a1f67fa5ba486108c0421d562a8953fa5 100644 (file)
@@ -12,6 +12,7 @@
 #include "globaldata.hpp"
 #include "mothurout.h"
 
+
 /**************************************************************************************************/
 
 GlobalData* GlobalData::_uniqueInstance = 0;
@@ -99,7 +100,11 @@ int main(int argc, char *argv[]){
                m->mothurOutEndLine();                  
                m->mothurOut("Type 'quit()' to exit program");
                m->mothurOutEndLine();  
-
+               
+               #ifdef USE_MPI
+                       m->mothurOutJustToLog("Using MPI\n");
+                       MPI_Init(&argc, &argv); 
+               #endif
                                
                //srand(54321);
                srand( (unsigned)time( NULL ) );
@@ -130,7 +135,7 @@ int main(int argc, char *argv[]){
                        mothur = new InteractEngine(argv[0]);   
                }
                
-               while(bail == 0)                {       bail = mothur->getInput();                      }
+               while(bail == 0)        {       bail = mothur->getInput();      }
                
                string outputDir = mothur->getOutputDir();
                string newlogFileName = outputDir + logFileName;
@@ -139,7 +144,11 @@ int main(int argc, char *argv[]){
                rename(logFileName.c_str(), newlogFileName.c_str()); //logfile with timestamp
                
                delete mothur;
-
+               
+               #ifdef USE_MPI
+                       MPI_Finalize();
+               #endif
+               
                return 0;
        }
        catch(exception& e) {
index 16a9844b70799c1334e95aabc20c6557688bdaaf..7904425caf46918ef8e79aa34abea972b77b2cd5 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -47,6 +47,9 @@
 #include <ctime>
 #include <limits>
 
+#ifdef USE_MPI
+       #include "mpi.h"
+#endif
 /***********************************************************************/
 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
index d9c8e5ddcbd435980dbc4f70bfbfa31799f74233..7b4732b45108b05cca9b17be98e9205b50e1a2ab 100644 (file)
@@ -26,7 +26,6 @@ class MothurOut {
                void errorOut(exception&, string, string);
                int control_pressed;
                bool executing;
-               
 
        private:
                static MothurOut* _uniqueInstance;
index e8565d05819e72968aee30bf6584b1dcb59cc484..07e946b78eb8d7d56c7d09a91fd2fb90fc8564a5 100644 (file)
@@ -279,10 +279,13 @@ int RemoveSeqsCommand::readList(){
                                }
                        
                                //get last name
-                               if (names.count(binnames) == 0) {  newNames += binnames;  }
+                               if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
 
                                //if there are names in this bin add to new list
-                               if (newNames != "") {  newList.push_back(newNames);     }
+                               if (newNames != "") {  
+                                       newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+                                       newList.push_back(newNames);    
+                               }
                        }
                                
                        //print new listvector
index 94caf65eafc40564fbbb04e9721cf385222c8e45..b73bfab7caefdb0a97e5f7cc8f3271ec3e51cb57 100644 (file)
@@ -19,46 +19,101 @@ Sequence::Sequence(){
 /***********************************************************************/
 
 Sequence::Sequence(string newName, string sequence) {
-
-       initialize();   
-       name = newName;
-       
-       //setUnaligned removes any gap characters for us
-       setUnaligned(sequence);
-       setAligned(sequence);
-       
+       try {
+               m = MothurOut::getInstance();
+               initialize();   
+               name = newName;
+               
+               //setUnaligned removes any gap characters for us
+               setUnaligned(sequence);
+               setAligned(sequence);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "Sequence");
+               exit(1);
+       }                       
 }
 //********************************************************************************************************************
 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
-Sequence::Sequence(ifstream& fastaFile){
-
-       initialize();
-       fastaFile >> name;
-       name = name.substr(1);
-       string sequence;
-       
-       //read comments
-       while ((name[0] == '#') && fastaFile) { 
-           while (!fastaFile.eof())    {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-               sequence = getCommentString(fastaFile);
+Sequence::Sequence(istringstream& fastaString){
+       try {
+               m = MothurOut::getInstance();
+       int pid;
+       MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+       cout << pid << " after mothur instance " << &name << endl;
+               initialize();
+       cout << "after mothur initialize" << endl;
+               fastaString >> name;
+       cout << "after name "  << endl;
+               name = name.substr(1);
+               string sequence;
                
-               if (fastaFile) {  
-                       fastaFile >> name;  
-                       name = name.substr(1);  
-               }else { 
-                       name = "";
-                       break;
+               //read comments
+               while ((name[0] == '#') && fastaString) { 
+                       while (fastaString)     {       char c = fastaString.get(); if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
+                       sequence = getCommentString(fastaString);
+                       
+                       if (fastaString) {  
+                               fastaString >> name;  
+                               name = name.substr(1);  
+                       }else { 
+                               name = "";
+                               break;
+                       }
                }
+       cout << "after mothur comment" << endl; 
+               //read real sequence
+               while (fastaString)     {       char c = fastaString.get(); if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
+       cout << "after mothur name" << endl;    
+               sequence = getSequenceString(fastaString);              
+       cout << "after mothur sequence" << endl;        
+               setAligned(sequence);   
+               //setUnaligned removes any gap characters for us                                                
+               setUnaligned(sequence);         
        }
-       
-       //read real sequence
-       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "Sequence");
+               exit(1);
+       }                                                               
+}
+
+//********************************************************************************************************************
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
+Sequence::Sequence(ifstream& fastaFile){
+       try {
+               m = MothurOut::getInstance();
+               initialize();
+               fastaFile >> name;
+               name = name.substr(1);
+               string sequence;
+               
+               //read comments
+               while ((name[0] == '#') && fastaFile) { 
+                       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                       sequence = getCommentString(fastaFile);
+                       
+                       if (fastaFile) {  
+                               fastaFile >> name;  
+                               name = name.substr(1);  
+                       }else { 
+                               name = "";
+                               break;
+                       }
+               }
+               
+               //read real sequence
+               while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+               
+               sequence = getSequenceString(fastaFile);                
                
-       sequence = getSequenceString(fastaFile);                
-       
-       setAligned(sequence);   
-       //setUnaligned removes any gap characters for us                                                
-       setUnaligned(sequence);                                                         
+               setAligned(sequence);   
+               //setUnaligned removes any gap characters for us                                                
+               setUnaligned(sequence); 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "Sequence");
+               exit(1);
+       }                                                       
 }
 //********************************************************************************************************************
 string Sequence::getSequenceString(ifstream& fastaFile) {
@@ -108,7 +163,54 @@ string Sequence::getCommentString(ifstream& fastaFile) {
                exit(1);
        }
 }
-
+//********************************************************************************************************************
+string Sequence::getSequenceString(istringstream& fastaFile) {
+       try {
+               char letter;
+               string sequence = "";   
+               
+               while(fastaFile){
+                       letter= fastaFile.get();
+                       if(letter == '>'){
+                               fastaFile.putback(letter);
+                               break;
+                       }
+                       else if(isprint(letter)){
+                               letter = toupper(letter);
+                               if(letter == 'U'){letter = 'T';}
+                               sequence += letter;
+                       }
+               }
+               
+               return sequence;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "getSequenceString");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(istringstream& fastaFile) {
+       try {
+               char letter;
+               string sequence = "";
+               
+               while(fastaFile){
+                       letter=fastaFile.get();
+                       if((letter == '\r') || (letter == '\n')){  
+                               gobble(fastaFile);  //in case its a \r\n situation
+                               break;
+                       }
+               }
+               
+               return sequence;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "getCommentString");
+               exit(1);
+       }
+}
 //********************************************************************************************************************
 
 void Sequence::initialize(){
index f48bf13dc98d034cea520cf78b09124fff30cb33..5f84d441c20d0fd8a0ed8f9284b7a32dfc9b4214 100644 (file)
@@ -24,6 +24,7 @@ public:
        Sequence();
        Sequence(string, string);
        Sequence(ifstream&);
+       Sequence(istringstream&);
        
        void setName(string);
        void setUnaligned(string);
@@ -51,6 +52,8 @@ private:
        void initialize();
        string getSequenceString(ifstream&);
        string getCommentString(ifstream&);
+       string getSequenceString(istringstream&);
+       string getCommentString(istringstream&);
        string name;
        string unaligned;
        string aligned;
index e42fdbb09cc10b00f535003c1c0742ba887d98b9..c8a11d70ddfe1fefa640e0e3412cc6f27ba0e579 100644 (file)
@@ -27,6 +27,7 @@ class SparseMatrix {
        
 public:
        SparseMatrix();
+       ~SparseMatrix(){  while(!mins.empty() && mins.back() == NULL){  mins.pop_back();        }  }
        int getNNodes();
        void print();                                   //Print the contents of the matrix
        void print(ListVector*);                //Print the contents of the matrix