]> git.donarmstrong.com Git - mothur.git/commitdiff
added cluster.split command
authorwestcott <westcott>
Thu, 20 May 2010 15:00:33 +0000 (15:00 +0000)
committerwestcott <westcott>
Thu, 20 May 2010 15:00:33 +0000 (15:00 +0000)
37 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
alignmentdb.cpp
bayesian.cpp
bellerophon.cpp
blastdb.cpp
blastdb.hpp
chimera.cpp
chimeraccodecommand.cpp
chimeracheckcommand.cpp
chimerapintailcommand.cpp
chimeraslayercommand.cpp
classify.cpp
classifyseqscommand.cpp
clustersplitcommand.cpp [new file with mode: 0644]
clustersplitcommand.h [new file with mode: 0644]
commandfactory.cpp
database.hpp
distancecommand.cpp
engine.cpp
filterseqscommand.cpp
hclustercommand.cpp
inputdata.cpp
inputdata.h
kmerdb.cpp
kmerdb.hpp
makefile
phylotree.cpp
readcluster.cpp
readcluster.h
screenseqscommand.cpp
seqsummarycommand.cpp
splitmatrix.cpp [new file with mode: 0644]
splitmatrix.h [new file with mode: 0644]
suffixdb.cpp
suffixdb.hpp
validparameter.cpp

index 2b7d8787c72c15eff10a16e7df12031de5a915c5..d4625478f17442e3bea270a80df246df2193baa8 100644 (file)
                A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = "<group>"; };
                A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = "<group>"; };
                A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
+               A730977D11A417BE00117C95 /* splitmatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitmatrix.h; sourceTree = SOURCE_ROOT; };
+               A730977E11A417BE00117C95 /* splitmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splitmatrix.cpp; sourceTree = SOURCE_ROOT; };
+               A73097B911A43E1300117C95 /* clustersplitcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clustersplitcommand.h; sourceTree = "<group>"; };
+               A73097BA11A43E1300117C95 /* clustersplitcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clustersplitcommand.cpp; sourceTree = "<group>"; };
                A73953DA11987ED100B0B160 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chopseqscommand.h; sourceTree = "<group>"; };
                A73953DB11987ED100B0B160 /* chopseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chopseqscommand.cpp; sourceTree = "<group>"; };
                A73F163411A1951D0087CA57 /* splitabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitabundcommand.h; sourceTree = "<group>"; };
                                A7DA201D113FECD400BF472F /* classifyseqscommand.cpp */,
                                A7D215C811996C6E00F13F13 /* clearcutcommand.h */,
                                A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */,
-                               A7DA2021113FECD400BF472F /* clustercommand.cpp */,
                                A7DA2022113FECD400BF472F /* clustercommand.h */,
+                               A7DA2021113FECD400BF472F /* clustercommand.cpp */,
+                               A73097B911A43E1300117C95 /* clustersplitcommand.h */,
+                               A73097BA11A43E1300117C95 /* clustersplitcommand.cpp */,
                                A7DA2025113FECD400BF472F /* collectcommand.cpp */,
                                A7DA2026113FECD400BF472F /* collectcommand.h */,
                                A7DA2029113FECD400BF472F /* collectsharedcommand.cpp */,
                                A7DA20ED113FECD400BF472F /* readphylip.h */,
                                A7DA20EE113FECD400BF472F /* readtree.cpp */,
                                A7DA20EF113FECD400BF472F /* readtree.h */,
+                               A730977D11A417BE00117C95 /* splitmatrix.h */,
+                               A730977E11A417BE00117C95 /* splitmatrix.cpp */,
                        );
                        name = read;
                        sourceTree = "<group>";
index a246ecc5806320efeadda6fa82f67fd2f2d99e21..03992dcd04e966f6b0765c9c1a2a0d651b6e283e 100644 (file)
@@ -303,8 +303,10 @@ int AlignCommand::execute(){
                                        MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs\r
                                        \r
                                        //send file positions to all processes\r
-                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs\r
-                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   \r
+                                       for(int i = 1; i < processors; i++) { \r
+                                               MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
+                                               MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
+                                       }\r
                                        \r
                                        //figure out how many sequences you have to align\r
                                        numSeqsPerProcessor = numFastaSeqs / processors;\r
@@ -323,9 +325,10 @@ int AlignCommand::execute(){
                                                if (tempResult != 0) { MPIWroteAccnos = true; }\r
                                        }\r
                                }else{ //you are a child process\r
-                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
+                                       MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
                                        MPIPos.resize(numFastaSeqs+1);\r
-                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
+                                       MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
+\r
                                        \r
                                        //figure out how many sequences you have to align\r
                                        numSeqsPerProcessor = numFastaSeqs / processors;\r
index 364ef12588223c0300eccd767aac6f17e6630aa1..9b2977dd91d11604b990860f1b85299aff50b147 100644 (file)
@@ -25,12 +25,14 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();\r
                \r
                #ifdef USE_MPI  \r
-                       int pid;\r
+                       int pid, processors;\r
                        vector<long> positions;\r
                \r
                        MPI_Status status; \r
                        MPI_File inMPI;\r
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);\r
+                       int tag = 2001;\r
        \r
                        char inFileName[1024];\r
                        strcpy(inFileName, fastaFileName.c_str());\r
@@ -41,12 +43,14 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                                positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
 \r
                                //send file positions to all processes\r
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs\r
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     \r
+                               for(int i = 1; i < processors; i++) { \r
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
+                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
+                               }\r
                        }else{\r
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
                                positions.resize(numSeqs+1);\r
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
+                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
                        }\r
                \r
                        //read file \r
@@ -73,7 +77,9 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                                        if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
                                }\r
                        }\r
-               \r
+                       \r
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case\r
+                       \r
                        MPI_File_close(&inMPI);\r
                \r
        #else\r
index 22eab72b60574679f61eca1f02990a3178aecd75..63f8716dc0ee4fd243a21c06dd41c30f648228c9 100644 (file)
@@ -362,7 +362,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                
                #ifdef USE_MPI
                        
-                       int pid, num, num2;
+                       int pid, num, num2, processors;
                        vector<long> positions;
                        vector<long> positions2;
                        
@@ -370,6 +370,8 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        MPI_File inMPI;
                        MPI_File inMPI2;
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
+                       int tag = 2001;
 
                        char inFileName[1024];
                        strcpy(inFileName, inNumName.c_str());
@@ -382,26 +384,24 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
 
                        if (pid == 0) {
                                positions = setFilePosEachLine(inNumName, num);
-                               
-                               //send file positions to all processes
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos 
-                               
                                positions2 = setFilePosEachLine(inName, num2);
                                
-                               //send file positions to all processes
-                               MPI_Bcast(&num2, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions2[0], (num2+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos       
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       
+                                       MPI_Send(&num2, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions2[0], (num2+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
 
                        }else{
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions.resize(num);
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               positions.resize(num+1);
+                               MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
-                               MPI_Bcast(&num2, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions2.resize(num2);
-                               MPI_Bcast(&positions2[0], (num2+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-
+                               MPI_Recv(&num2, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               positions2.resize(num2+1);
+                               MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                        }
                
                        //read numKmers
@@ -473,6 +473,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                                
                        }
                        MPI_File_close(&inMPI2);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
                
                        in >> numKmers; gobble(in);
index 54025eb9285c87be35039a3051454cfd763a3c4d..aa26d296e614adf04d69a2d73fb341dd798205a0 100644 (file)
@@ -310,7 +310,7 @@ int Bellerophon::getChimeras() {
                        
                        MPIBestSend.clear();
                }
-               
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        #else
        
                //divide breakpoints between processors
index b52c82e58d0bb7c441e9b9df324461b4b9a8df60..35a19f485816c87db85b71c74ca138f00595b180 100644 (file)
@@ -177,55 +177,6 @@ void BlastDB::generateDB() {
                exit(1);
        }
 }
-#ifdef USE_MPI 
-/**************************************************************************************************/
-int BlastDB::MPISend(int receiver) {
-       try {
-               
-               //send gapOpen - float
-               MPI_Send(&gapOpen, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
-
-               //send gapExtend - float
-               MPI_Send(&gapExtend, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
-               
-               //send match - float
-               MPI_Send(&match, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
-               
-               //send mismatch - float
-               MPI_Send(&misMatch, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD); 
-                                                                       
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "BlastDB", "MPISend");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int BlastDB::MPIRecv(int sender) {
-       try {
-               MPI_Status status;
-               
-               //receive gapOpen - float
-               MPI_Recv(&gapOpen, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-               
-               //receive gapExtend - float
-               MPI_Recv(&gapExtend, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-                               
-               //receive match - float
-               MPI_Recv(&match, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-               
-               //receive mismatch - float
-               MPI_Recv(&misMatch, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);               
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "BlastDB", "MPIRecv");
-               exit(1);
-       }
-}
-#endif 
 /**************************************************************************************************/
 
 /**************************************************************************************************/
index d61aaecb5e0712b47458e000416b881719cc7e65..2e7423dcba612f332d7ef2f2657fac9aa16f1b79 100644 (file)
@@ -26,11 +26,6 @@ public:
        vector<int> findClosestSequences(Sequence*, int);
        vector<int> findClosestMegaBlast(Sequence*, int);
        
-       #ifdef USE_MPI  
-       int MPISend(int); //just sends gapOpen, gapExtend, match and mismatch
-       int MPIRecv(int);
-       #endif
-       
 private:
        string dbFileName;
        string queryFileName;
index b513075b4782167ce3f14b74df4f31a6f76b9529..483e5530b094ce011a1e8894a6160a9ea60f2361 100644 (file)
@@ -101,13 +101,15 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
                
                #ifdef USE_MPI  
-                       int pid;
+                       int pid, processors;
                        vector<long> positions;
                        int numSeqs;
+                       int tag = 2001;
                
                        MPI_Status status; 
                        MPI_File inMPI;
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
 
                        //char* inFileName = new char[file.length()];
                        //memcpy(inFileName, file.c_str(), file.length());
@@ -122,12 +124,14 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                                positions = setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
 
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                        }else{
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                positions.resize(numSeqs+1);
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                        }
                        
                        //read file 
@@ -157,6 +161,7 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                        }
                        
                        MPI_File_close(&inMPI);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        #else
 
                ifstream in;
index cc046ea3dcd44c5443434d84493025791ab2910e..45f7f545f78d3d239592ccc60c12b2561393e869 100644 (file)
@@ -233,8 +233,10 @@ int ChimeraCcodeCommand::execute(){
                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -253,9 +255,9 @@ int ChimeraCcodeCommand::execute(){
                                        if (tempResult != 0) { MPIWroteAccnos = true; }
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -276,6 +278,8 @@ int ChimeraCcodeCommand::execute(){
                        MPI_File_close(&outMPI);
                        MPI_File_close(&outMPIAccnos);
                        
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                       
                        //delete accnos file if blank
                        if (pid == 0) {
                                if (!MPIWroteAccnos) { 
index 1b25861afa3e60dd6d8d614ba2c2c38d0f11dc3e..66cf12fb8768bcc9c4995256544761e2d8baf820 100644 (file)
@@ -191,8 +191,10 @@ int ChimeraCheckCommand::execute(){
                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }       
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -211,9 +213,9 @@ int ChimeraCheckCommand::execute(){
                                        MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -235,6 +237,7 @@ int ChimeraCheckCommand::execute(){
                        //close files 
                        MPI_File_close(&inMPI);
                        MPI_File_close(&outMPI);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        #else
                
                //break up file
index adf060f485d862dc93797069a54bb58fbe25adfb..6cef8bbec330c3d345d332f772970ff33a988f79 100644 (file)
@@ -249,8 +249,10 @@ int ChimeraPintailCommand::execute(){
                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -269,9 +271,9 @@ int ChimeraPintailCommand::execute(){
                                        if (tempResult != 0) { MPIWroteAccnos = true; }
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -291,6 +293,7 @@ int ChimeraPintailCommand::execute(){
                        MPI_File_close(&inMPI);
                        MPI_File_close(&outMPI);
                        MPI_File_close(&outMPIAccnos);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                        
                        //delete accnos file if blank
                        if (pid == 0) {
index 435cd301a53c42b0e00765d588c681f98da5dfed..c41e026497191d8218f90ef8ddb5754aa92cfee0 100644 (file)
@@ -261,8 +261,10 @@ int ChimeraSlayerCommand::execute(){
                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -281,9 +283,9 @@ int ChimeraSlayerCommand::execute(){
                                        if (tempResult != 0) { MPIWroteAccnos = true; }
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numSeqs / processors;
@@ -303,6 +305,7 @@ int ChimeraSlayerCommand::execute(){
                        MPI_File_close(&inMPI);
                        MPI_File_close(&outMPI);
                        MPI_File_close(&outMPIAccnos);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                        
                        //delete accnos file if blank
                        if (pid == 0) {
index 147f499b87357f5782b98fc882db80f3233f2aee..e07344d4678a0ca263008e4b10d4d125ad8ef122 100644 (file)
@@ -27,12 +27,14 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                
                m->mothurOut("Generating search database...    "); cout.flush();
 #ifdef USE_MPI 
-                       int pid;
+                       int pid, processors;
                        vector<long> positions;
+                       int tag = 2001;
                
                        MPI_Status status; 
                        MPI_File inMPI;
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
 
                        //char* inFileName = new char[tempFile.length()];
                        //memcpy(inFileName, tempFile.c_str(), tempFile.length());
@@ -47,12 +49,14 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
 
                                //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                        }else{
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions.resize(numSeqs);
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               positions.resize(numSeqs+1);
+                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                        }
                        
                        //create database
@@ -86,6 +90,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        
                        database->generateDB();
                        MPI_File_close(&inMPI);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        #else
                
                //need to know number of template seqs for suffixdb
@@ -171,12 +176,14 @@ int Classify::readTaxonomy(string file) {
                m->mothurOut("Reading in the " + file + " taxonomy...\t");      cout.flush();
 
 #ifdef USE_MPI 
-               int pid, num;
+               int pid, num, processors;
                vector<long> positions;
+               int tag = 2001;
                
                MPI_Status status; 
                MPI_File inMPI;
                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               MPI_Comm_size(MPI_COMM_WORLD, &processors);
 
                //char* inFileName = new char[file.length()];
                //memcpy(inFileName, file.c_str(), file.length());
@@ -191,12 +198,14 @@ int Classify::readTaxonomy(string file) {
                        positions = setFilePosEachLine(file, num);
                        
                        //send file positions to all processes
-                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                       MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos 
+                       for(int i = 1; i < processors; i++) { 
+                               MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                               MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                       }
                }else{
-                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                       positions.resize(num);
-                       MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                       MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                       positions.resize(num+1);
+                       MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                }
        
                //read file 
@@ -218,6 +227,7 @@ int Classify::readTaxonomy(string file) {
                }
                
                MPI_File_close(&inMPI);
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else                          
                ifstream inTax;
                openInputFile(file, inTax);
index 7ae2ee5041af97ff462909095aa2af0ec24e5b0d..eee316093d81b9385cac7ff428ed2671cf3aee1e 100644 (file)
@@ -419,8 +419,10 @@ int ClassifySeqsCommand::execute(){
                                        MPIPos = setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
                                        
                                        //send file positions to all processes
-                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numFastaSeqs / processors;
@@ -438,9 +440,9 @@ int ClassifySeqsCommand::execute(){
                                                MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
                                        }
                                }else{ //you are a child process
-                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPIPos.resize(numFastaSeqs+1);
-                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numFastaSeqs / processors;
@@ -461,6 +463,7 @@ int ClassifySeqsCommand::execute(){
                                MPI_File_close(&inMPI);
                                MPI_File_close(&outMPINewTax);
                                MPI_File_close(&outMPITempTax);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp
new file mode 100644 (file)
index 0000000..a1e6db0
--- /dev/null
@@ -0,0 +1,632 @@
+/*
+ *  clustersplitcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 5/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clustersplitcommand.h"
+#include "readcluster.h"
+#include "splitmatrix.h"
+#include "readphylip.h"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
+ClusterSplitCommand::ClusterSplitCommand(string option)  {
+       try{
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"phylip","column","name","cutoff","precision","method","showabund","timing","hard","processors","splitcutoff","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       map<string,string>::iterator it;
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
+                                       abort = true;
+                               }
+                       }
+                       
+                       globaldata->newRead();
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                               //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("column");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["column"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                       }
+                       
+                       //check for required parameters
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not open") { abort = true; }
+                       else if (phylipfile == "not found") { phylipfile = ""; }        
+                       else {  distfile = phylipfile;  format = "phylip";      }
+                       
+                       columnfile = validParameter.validFile(parameters, "column", true);
+                       if (columnfile == "not open") { abort = true; } 
+                       else if (columnfile == "not found") { columnfile = ""; }
+                       else {  distfile = columnfile; format = "column";       }
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }   
+                       else if (namefile == "not found") { namefile = ""; }
+                       
+                       if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a hcluster command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
+                       else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a hcluster command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
+               
+                       if (columnfile != "") {
+                               if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
+                       }
+                                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       //get user cutoff and precision or use defaults
+                       string temp;
+                       temp = validParameter.validFile(parameters, "precision", false);
+                       if (temp == "not found") { temp = "100"; }
+                       //saves precision legnth for formatting below
+                       length = temp.length();
+                       convert(temp, precision); 
+                       
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
+                       convert(temp, processors); 
+                       
+                       temp = validParameter.validFile(parameters, "cutoff", false);
+                       if (temp == "not found") { temp = "10"; }
+                       convert(temp, cutoff); 
+                       if (!hard) {    cutoff += (5 / (precision * 10.0));  }
+                       
+                       temp = validParameter.validFile(parameters, "splitcutoff", false);
+                       if (temp == "not found") { temp = "0.10"; }
+                       convert(temp, splitcutoff); 
+                       if (!hard) {    splitcutoff += (5 / (precision * 10.0));  }
+                       
+                       method = validParameter.validFile(parameters, "method", false);
+                       if (method == "not found") { method = "furthest"; }
+                       
+                       if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
+                       else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
+
+                       showabund = validParameter.validFile(parameters, "showabund", false);
+                       if (showabund == "not found") { showabund = "T"; }
+
+                       timing = validParameter.validFile(parameters, "timing", false);
+                       if (timing == "not found") { timing = "F"; }
+                       
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void ClusterSplitCommand::help(){
+       try {
+               m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
+               m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n");
+               m->mothurOut("The cluster command should be in the following format: \n");
+               m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+               m->mothurOut("The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");      
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+ClusterSplitCommand::~ClusterSplitCommand(){}
+
+//**********************************************************************************************************************
+
+int ClusterSplitCommand::execute(){
+       try {
+       
+               if (abort == true) {    return 0;       }
+               
+               //****************** file prep work ******************************//
+               
+               //if user gave a phylip file convert to column file
+               if (format == "phylip") {
+                       ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
+                       
+                       NameAssignment* nameMap = NULL;
+                       convert->read(nameMap);
+                       
+                       if (m->control_pressed) {  delete convert;  return 0;  }
+                       
+                       string distfile = convert->getOutputFile();
+                       
+                       //if no names file given with phylip file, create it
+                       ListVector* listToMakeNameFile =  convert->getListVector();
+                       if (namefile == "") {  //you need to make a namefile for split matrix
+                               ofstream out;
+                               namefile = phylipfile + ".names";
+                               openOutputFile(namefile, out);
+                               for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
+                                       string bin = listToMakeNameFile->get(i);
+                                       out << bin << '\t' << bin << endl;
+                               }
+                               out.close();
+                       }
+                       delete listToMakeNameFile;
+                       delete convert;
+               }
+               
+               if (m->control_pressed) { return 0; }
+               
+               time_t estart = time(NULL);
+               
+               //split matrix into non-overlapping groups
+               SplitMatrix* split = new SplitMatrix(distfile, namefile, splitcutoff);
+               split->split();
+               
+               if (m->control_pressed) { delete split; return 0; }
+               
+               string singletonName = split->getSingletonNames();
+               vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
+               delete split;
+               
+               m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
+               estart = time(NULL);
+               
+               if (m->control_pressed) { return 0; }
+               
+               //****************** break up files between processes and cluster each file set ******************************//
+               vector<string> listFileNames;
+               set<string> labels;
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
+                               }else{
+                                       vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
+                                       dividedNames.resize(processors);
+                                       
+                                       //for each file group figure out which process will complete it
+                                       //want to divide the load intelligently so the big files are spread between processes
+                                       int count = 1;
+                                       for (int i = 0; i < distName.size(); i++) { 
+                                               int processToAssign = (i+1) % processors; 
+                                               if (processToAssign == 0) { processToAssign = processors; }
+                                               
+                                               dividedNames[(processToAssign-1)].push_back(distName[i]);
+                                       }
+                                       
+                                       createProcesses(dividedNames);
+                                                       
+                                       if (m->control_pressed) { return 0; }
+
+                                       //get list of list file names from each process
+                                       for(int i=0;i<processors;i++){
+                                               string filename = toString(processIDS[i]) + ".temp";
+                                               ifstream in;
+                                               openInputFile(filename, in);
+                                               
+                                               while(!in.eof()) {
+                                                       string tempName;
+                                                       in >> tempName; gobble(in);
+                                                       listFileNames.push_back(tempName);
+                                               }
+                                               in.close();
+                                               remove((toString(processIDS[i]) + ".temp").c_str());
+                                               
+                                               //get labels
+                                               filename = toString(processIDS[i]) + ".temp.labels";
+                                               ifstream in2;
+                                               openInputFile(filename, in2);
+                                               
+                                               while(!in2.eof()) {
+                                                       string tempName;
+                                                       in2 >> tempName; gobble(in);
+                                                       if (labels.count(tempName) == 0) { labels.insert(tempName); }
+                                               }
+                                               in2.close();
+                                               remove((toString(processIDS[i]) + ".temp.labels").c_str());
+                                       }
+                               }
+               #else
+                               listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
+               #endif
+               
+               if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
+               
+               //****************** merge list file and create rabund and sabund files ******************************//
+                               
+               mergeLists(listFileNames, singletonName, labels);
+
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+               
+               m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ClusterSplitCommand::mergeLists(vector<string> listNames, string singleton, set<string> userLabels){
+       try {
+               if (outputDir == "") { outputDir += hasPath(distfile); }
+               fileroot = outputDir + getRootName(getSimpleName(distfile));
+               
+               openOutputFile(fileroot+ tag + ".sabund",       outSabund);
+               openOutputFile(fileroot+ tag + ".rabund",       outRabund);
+               openOutputFile(fileroot+ tag + ".list",         outList);
+                               
+               outputNames.push_back(fileroot+ tag + ".sabund");
+               outputNames.push_back(fileroot+ tag + ".rabund");
+               outputNames.push_back(fileroot+ tag + ".list");
+                       
+               //read in singletons
+               ListVector* listSingle = NULL;
+               if (singleton != "none") {
+                       ifstream in;
+                       openInputFile(singleton, in);
+                               
+                       string firstCol, secondCol;
+                       listSingle = new ListVector();
+                       while (!in.eof()) {
+                               in >> firstCol >> secondCol; gobble(in);
+                               listSingle->push_back(secondCol);
+                       }
+                       in.close();
+               }
+               
+               vector<float> orderFloat;
+       
+               //go through users set and make them floats so we can sort them 
+               for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
+                       float temp;
+
+                       if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
+                               convert(*it, temp);
+                               orderFloat.push_back(temp);
+                       }else if (*it == "unique") { orderFloat.push_back(-1.0); }
+                       else {
+                               userLabels.erase(*it); 
+                               it--;
+                       }
+               }
+               
+               //sort order
+               sort(orderFloat.begin(), orderFloat.end());
+
+               vector<InputData*> inputs;
+               vector<string> lastLabels;
+               for (int i = 0; i < listNames.size(); i++) {
+                       InputData* input = new InputData(listNames[i], "list");
+                       inputs.push_back(input);
+                       
+                       ifstream in;
+                       openInputFile(listNames[i], in);
+                       ListVector tempList(in);
+                       lastLabels.push_back(tempList.getLabel());
+                       in.close();
+               }
+               
+               ListVector* merged = NULL;
+                               
+               //for each label needed
+               for(int l = 0; l < orderFloat.size(); l++){
+                       
+                       string thisLabel;
+                       if (orderFloat[l] == -1) { thisLabel = "unique"; }
+                       else { thisLabel = toString(orderFloat[l],  length-1);  } 
+       
+                       //get the list info from each file
+                       for (int k = 0; k < listNames.size(); k++) {
+       
+                               ListVector* list = inputs[k]->getListVector();
+                               
+                               //this file has reached the end
+                               if (list == NULL) { list = inputs[k]->getListVector(lastLabels[k], true); }     
+                                               
+                               float labelFloat;
+                               if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
+                               else { convert(list->getLabel(), labelFloat); }
+
+                               //check for missing labels
+                               if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
+                                       //if its bigger get last label, otherwise keep it
+                                       delete list;
+                                       list = inputs[k]->getListVector(lastLabels[k], true); //get last list vector to use, you actually want to move back in the file
+                               }
+                               lastLabels[k] = list->getLabel();
+
+                               //is this the first file
+                               if (merged == NULL) {  merged = new ListVector();  merged->setLabel(thisLabel); }
+                               
+                               for (int j = 0; j < list->getNumBins(); j++) {
+                                       merged->push_back(list->get(j));
+                               }
+                               
+                               delete list;
+                       }
+                       
+                       //add in singletons
+                       for (int j = 0; j < listSingle->getNumBins(); j++) {
+                               merged->push_back(listSingle->get(j));
+                       }
+
+                       //print to files
+                       printData(merged);
+                       
+                       delete merged; merged = NULL;
+               }
+               
+               if (listSingle != NULL) { delete listSingle; remove(singleton.c_str());  }
+               
+               for (int i = 0; i < listNames.size(); i++) {  delete inputs[i];  remove(listNames[i].c_str());  }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "mergeLists");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ClusterSplitCommand::printData(ListVector* oldList){
+       try {
+               string label = oldList->getLabel();
+               RAbundVector oldRAbund = oldList->getRAbundVector();
+               
+               oldRAbund.setLabel(label);
+               if (isTrue(showabund)) {
+                       oldRAbund.getSAbundVector().print(cout);
+               }
+               oldRAbund.print(outRabund);
+               oldRAbund.getSAbundVector().print(outSabund);
+       
+               oldList->print(outList);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "printData");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
+       try {
+       
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               int exitCommand = 1;
+               processIDS.clear();
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               set<string> labels;
+                               vector<string> listFileNames = cluster(dividedNames[process], labels);
+                               
+                               //write out names to file
+                               string filename = toString(getpid()) + ".temp";
+                               ofstream out;
+                               openOutputFile(filename, out);
+                               for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
+                               out.close();
+                               
+                               //print out labels
+                               ofstream outLabels;
+                               filename = toString(getpid()) + ".temp.labels";
+                               openOutputFile(filename, outLabels);
+               
+                               for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
+                                       outLabels << (*it) << endl;
+                               }
+                               outLabels.close();
+
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               return exitCommand;
+       #endif          
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
+       try {
+               Cluster* cluster;
+               SparseMatrix* matrix;
+               ListVector* list;
+               ListVector oldList;
+               RAbundVector* rabund;
+               
+               vector<string> listFileNames;
+               
+               //cluster each distance file
+               for (int i = 0; i < distNames.size(); i++) {
+                       
+                       string thisNamefile = distNames[i].begin()->second;
+                       string thisDistFile = distNames[i].begin()->first;
+                       
+                       //read in distance file
+                       globaldata->setNameFile(thisNamefile);
+                       globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
+                       
+                       ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
+                       read->setCutoff(cutoff);
+
+                       NameAssignment* nameMap = new NameAssignment(thisNamefile);
+                       nameMap->readMap();
+                       read->read(nameMap);
+                       
+                       if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
+                       
+                       list = read->getListVector();
+                       oldList = *list;
+                       matrix = read->getMatrix();
+                       
+                       delete read; 
+                       delete nameMap; 
+                       
+                       m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+               
+                       rabund = new RAbundVector(list->getRAbundVector());
+                       
+                       //create cluster
+                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+                       tag = cluster->getTag();
+               
+                       if (outputDir == "") { outputDir += hasPath(thisDistFile); }
+                       fileroot = outputDir + getRootName(getSimpleName(thisDistFile));
+                       
+                       ofstream listFile;
+                       openOutputFile(fileroot+ tag + ".list", listFile);
+               
+                       listFileNames.push_back(fileroot+ tag + ".list");
+               
+                       time_t estart = time(NULL);
+                       
+                       float previousDist = 0.00000;
+                       float rndPreviousDist = 0.00000;
+                       
+                       oldList = *list;
+
+                       print_start = true;
+                       start = time(NULL);
+                       double saveCutoff = cutoff;
+               
+                       while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+               
+                               if (m->control_pressed) { //clean up
+                                       delete matrix; delete list;     delete cluster; delete rabund;
+                                       listFile.close();
+                                       for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
+                                       listFileNames.clear(); return listFileNames;
+                               }
+               
+                               cluster->update(cutoff);
+       
+                               float dist = matrix->getSmallDist();
+                               float rndDist = roundDist(dist, precision);
+
+                               if(previousDist <= 0.0000 && dist != previousDist){
+                                       oldList.setLabel("unique");
+                                       oldList.print(listFile);
+                                       if (labels.count("unique") == 0) {  labels.insert("unique");  }
+                               }
+                               else if(rndDist != rndPreviousDist){
+                                       oldList.setLabel(toString(rndPreviousDist,  length-1));
+                                       oldList.print(listFile);
+                                       if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+                               }
+               
+                               previousDist = dist;
+                               rndPreviousDist = rndDist;
+                               oldList = *list;
+                       }
+
+               
+                       if(previousDist <= 0.0000){
+                               oldList.setLabel("unique");
+                               oldList.print(listFile);
+                               if (labels.count("unique") == 0) { labels.insert("unique"); }
+                       }
+                       else if(rndPreviousDist<cutoff){
+                               oldList.setLabel(toString(rndPreviousDist,  length-1));
+                               oldList.print(listFile);
+                               if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+                       }
+               
+                       delete matrix; delete list;     delete cluster; delete rabund; 
+                       listFile.close();
+                       
+                       if (m->control_pressed) { //clean up
+                               for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
+                               listFileNames.clear(); return listFileNames;
+                       }
+                       
+                       remove(thisDistFile.c_str());
+                       remove(thisNamefile.c_str());
+               }
+               
+                               
+               return listFileNames;
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "cluster");
+               exit(1);
+       }
+
+
+}
+
+//**********************************************************************************************************************
diff --git a/clustersplitcommand.h b/clustersplitcommand.h
new file mode 100644 (file)
index 0000000..4d1f435
--- /dev/null
@@ -0,0 +1,49 @@
+#ifndef CLUSTERSPLITCOMMAND_H
+#define CLUSTERSPLITCOMMAND_H
+
+/*
+ *  clustersplitcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 5/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+#include "command.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+#include "cluster.hpp"
+#include "sparsematrix.hpp"
+#include "globaldata.hpp"
+
+
+class ClusterSplitCommand : public Command {
+       
+public:
+       ClusterSplitCommand(string);
+       ~ClusterSplitCommand();
+       int execute();  
+       void help();
+       
+private:
+       GlobalData* globaldata;
+       vector<int> processIDS;   //processid
+       vector<string> outputNames;
+
+       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing;
+       double cutoff, splitcutoff;
+       int precision, length, processors;
+       bool print_start, abort, hard;
+       time_t start;
+       ofstream outList, outRabund, outSabund;
+       
+       void printData(ListVector*);
+       int createProcesses(vector < vector < map<string, string> > >);
+       vector<string> cluster(vector< map<string, string> >, set<string>&);
+       int mergeLists(vector<string>, string, set<string>);
+};
+
+#endif
+
index f358797f7d94bf557c8bf986dd16faaed03743f3..0bca8c0ee3db3706404b3fc5c4ff42b8c8f99388 100644 (file)
@@ -78,6 +78,7 @@
 #include "clearcutcommand.h"
 #include "catchallcommand.h"
 #include "splitabundcommand.h"
+#include "clustersplitcommand.h"
 
 /*******************************************************/
 
@@ -163,6 +164,7 @@ CommandFactory::CommandFactory(){
        commands["clearcut"]                    = "clearcut";
        commands["catchall"]                    = "catchall";
        commands["split.abund"]                 = "split.abund";
+       commands["cluster.split"]               = "cluster.split";
        commands["classify.seqs"]               = "MPIEnabled"; 
        commands["dist.seqs"]                   = "MPIEnabled";
        commands["filter.seqs"]                 = "MPIEnabled";
@@ -285,6 +287,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "clearcut")                              {       command = new ClearcutCommand(optionString);                            }
                else if(commandName == "catchall")                              {       command = new CatchAllCommand(optionString);                            }
                else if(commandName == "split.abund")                   {       command = new SplitAbundCommand(optionString);                          }
+               else if(commandName == "cluster.split")                 {       command = new ClusterSplitCommand(optionString);                        }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
index efc7ba7f111d956c31d20626d825c079cf7963c8..3191fdfb02d5323129af949cf1eb50aea50ff3c8 100644 (file)
@@ -56,11 +56,6 @@ public:
        virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };  
        virtual int getMaxKmer(){       return 1;       };
        
-       #ifdef USE_MPI  
-       virtual int MPISend(int) = 0;
-       virtual int MPIRecv(int) = 0;
-       #endif
-       
 protected:
        MothurOut* m;
        int numSeqs, longest;
index ec32ffeb9e2e28b7398f78c5dd5fd858de2326a0..503f64b21cfd0ae64ec7413c7bfc3dc993e7e2d3 100644 (file)
@@ -300,6 +300,7 @@ int DistanceCommand::execute(){
                                MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
                        }
                }
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else          
                                
        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
index 9e5801b1b057b2f2a1cc4a986e105ae3e73f14b5..90103eae920da6cd225e03606fcb2bf3d6198e2e 100644 (file)
@@ -73,6 +73,8 @@ bool InteractEngine::getInput(){
                                        int pid;
                                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
                                        
+                                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                               
                                        if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
                                #endif
                                //executes valid command
@@ -200,7 +202,9 @@ bool BatchEngine::getInput(){
                                        #ifdef USE_MPI
                                                int pid;
                                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
-                                       
+cout << pid << " is waiting " << commandName << endl;                                          
+                                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+cout << pid << " is here " << commandName << endl;
                                                if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
                                        #endif
                                        //executes valid command
@@ -292,6 +296,8 @@ bool ScriptEngine::getInput(){
                                        int pid;
                                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
                                        
+                                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                                       
                                        if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
                                #endif
                                //executes valid command
index 41c321933754ebb0a86dc279985a31711b31f629..1c70b3e493fd2c3bf407746f8f97ddb276b801ea 100644 (file)
@@ -282,8 +282,10 @@ int FilterSeqsCommand::filterSequences() {
                                        numSeqs += num;
                                        
                                        //send file positions to all processes
-                                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                                       MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos    
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
                                        
                                        //figure out how many sequences you have to do
                                        numSeqsPerProcessor = num / processors;
@@ -303,10 +305,10 @@ int FilterSeqsCommand::filterSequences() {
                                        }
                                        
                                }else { //you are a child process
-                                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                                       numSeqs += num;
+                                       MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPIPos.resize(num+1);
-                                       MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       numSeqs += num;
+                                       MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = num / processors;
@@ -328,6 +330,7 @@ int FilterSeqsCommand::filterSequences() {
                                
                                MPI_File_close(&outMPI);
                                MPI_File_close(&inMPI);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
@@ -543,7 +546,7 @@ string FilterSeqsCommand::createFilter() {
                
                F.setLength(alignmentLength);
                
-               if(soft != 0 || isTrue(vertical)){
+               if(trump != '*' || isTrue(vertical) || soft != 0){
                        F.initialize();
                }
                
@@ -581,8 +584,10 @@ string FilterSeqsCommand::createFilter() {
                                                numSeqs += num;
                                                
                                                //send file positions to all processes
-                                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                                               MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos    
+                                               for(int i = 1; i < processors; i++) { 
+                                                       MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                                       MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                               }
                                                                
                                                //figure out how many sequences you have to do
                                                numSeqsPerProcessor = num / processors;
@@ -596,11 +601,10 @@ string FilterSeqsCommand::createFilter() {
                                                if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
                                                                                                
                                }else { //i am the child process
-                       
-                                       MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPIPos.resize(num+1);
                                        numSeqs += num;
-                                       MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = num / processors;
@@ -615,6 +619,7 @@ string FilterSeqsCommand::createFilter() {
                                }
                                
                                MPI_File_close(&inMPI);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
@@ -668,20 +673,24 @@ string FilterSeqsCommand::createFilter() {
                                vector<int> temp; temp.resize(alignmentLength+1);
                                                                
                                //get the frequencies from the child processes
-                               for(int i = 0; i < ((processors-1)*5); i++) { 
-                                       MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status); 
-                                       int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
-                                       
-                                       if (receiveTag == Atag) { //you are recieveing the A frequencies
-                                               for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
-                                       }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
-                                               for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
-                                       }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
-                                               for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
-                                       }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
-                                               for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
-                                       }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
-                                               for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
+                               for(int i = 1; i < processors; i++) { 
+                               
+                                       for (int j = 0; j < 5; j++) {
+                                       
+                                               MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status); 
+                                               int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
+                                               
+                                               if (receiveTag == Atag) { //you are recieveing the A frequencies
+                                                       for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
+                                               }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
+                                                       for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
+                                               }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
+                                                       for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
+                                               }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
+                                                       for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
+                                               }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
+                                                       for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
+                                               }
                                        }
                                } 
                        }else{
@@ -701,6 +710,8 @@ string FilterSeqsCommand::createFilter() {
                        
                }
                
+               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               
                if (pid == 0) { //only one process should output the filter
 #endif
                F.setNumSeqs(numSeqs);
@@ -712,10 +723,14 @@ string FilterSeqsCommand::createFilter() {
                
 #ifdef USE_MPI
                //send filter string to kids
-               MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD); 
+               //for(int i = 1; i < processors; i++) { 
+               //      MPI_Send(&filterString[0], alignmentLength, MPI_CHAR, i, 2001, MPI_COMM_WORLD);
+               //}
+               MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
        }else{
                //recieve filterString
                char* tempBuf = new char[alignmentLength];
+               //MPI_Recv(&tempBuf[0], alignmentLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD, &status);
                MPI_Bcast(tempBuf, alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
                
                filterString = tempBuf;
@@ -725,8 +740,7 @@ string FilterSeqsCommand::createFilter() {
        
        MPI_Barrier(MPI_COMM_WORLD);
 #endif
-               
-               
+                               
                return filterString;
        }
        catch(exception& e) {
index 89c6dbd4d27d61ade30c0668a2376281298bd37d..dc14e1670c5221599dd14929dd6d08fde6c6b766 100644 (file)
@@ -192,7 +192,7 @@ int HClusterCommand::execute(){
                time_t estart = time(NULL);
                
                if (!sorted) {
-                       read = new ReadCluster(distfile, cutoff, outputDir);    
+                       read = new ReadCluster(distfile, cutoff, outputDir, true);      
                        read->setFormat(format);
                        read->read(globaldata->nameMap);
                        
index 8e54ab787c683535dd132caf0881aab3399945b9..65685d6a7e13c09f6563ebb3c2ea3f78dd3fd937 100644 (file)
@@ -60,7 +60,7 @@ InputData::InputData(string fName, string orderFileName, string f) : format(f){
 
 ListVector* InputData::getListVector(){
        try {
-               if(fileHandle){
+               if(!fileHandle.eof()){
                        if(format == "list") {
                                list = new ListVector(fileHandle);
                        }else{ list = NULL;  }
@@ -79,7 +79,6 @@ ListVector* InputData::getListVector(){
 }
 
 /***********************************************************************/
-
 ListVector* InputData::getListVector(string label){
        try {
                ifstream in;
@@ -115,7 +114,40 @@ ListVector* InputData::getListVector(string label){
                exit(1);
        }
 }
+/***********************************************************************/
+ListVector* InputData::getListVector(string label, bool resetFP){
+       try {
+               string  thisLabel;
+               fileHandle.clear();
+               fileHandle.seekg(0);
+               
+               if(fileHandle){
 
+                       if (format == "list") {
+                       
+                               while (fileHandle.eof() != true) {
+                                       
+                                       list = new ListVector(fileHandle); gobble(fileHandle);
+                                       thisLabel = list->getLabel();
+                                       
+                                       //if you are at the last label
+                                       if (thisLabel == label) {  break;  }
+                                       //so you don't loose this memory
+                                       else {  delete list;    }
+                               }
+                       }else{ list = NULL;  }
+               
+                       return list;
+               }
+               else{
+                       return NULL;
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "InputData", "getListVector");
+               exit(1);
+       }
+}
 
 /***********************************************************************/
 
index 251df5bf9a11d230eaae79103c8546e7def900a7..c91720b567838d38ff62379bfbc0fc381356d6de 100644 (file)
@@ -16,6 +16,7 @@ public:
        ~InputData();
        ListVector* getListVector();
        ListVector* getListVector(string);  //pass the label you want
+       ListVector* getListVector(string, bool);  //pass the label you want, reset filepointer
        SharedListVector* getSharedListVector();
        SharedListVector* getSharedListVector(string);  //pass the label you want
        OrderVector* getOrderVector(); 
index 77d93daf4b6c8cf18a8be546fcd4eb41c0dc7301..33761a8416524fa3a0fd10b7136ea5a097d3fd08 100644 (file)
@@ -206,43 +206,6 @@ vector<int> KmerDB::getSequencesWithKmer(int kmer) {
                exit(1);
        }       
 }
-#ifdef USE_MPI 
-/**************************************************************************************************/
-int KmerDB::MPISend(int receiver) {
-       try {
-               
-               //send kmerSize - int
-               MPI_Send(&kmerSize, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "KmerDB", "MPISend");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int KmerDB::MPIRecv(int sender) {
-       try {
-               MPI_Status status;
-               
-               //receive kmerSize - int
-               MPI_Recv(&kmerSize, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
-               
-               //set maxKmer 
-               int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
-               count = 0;
-               maxKmer = power4s[kmerSize];
-               kmerLocations.resize(maxKmer+1);
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "KmerDB", "MPIRecv");
-               exit(1);
-       }
-}
-#endif
 /**************************************************************************************************/
 
 
index 513f3f07d64bbd267e11bd16777fae5aadb97a5c..becdeda7fa5f343b9d5d54e81c0e5df62a1661b6 100644 (file)
@@ -37,11 +37,6 @@ public:
        vector<int> getSequencesWithKmer(int);  //returns vector of sequences that contain kmer passed in
        int getMaxKmer() { return maxKmer; }
        
-       #ifdef USE_MPI  
-       int MPISend(int); //just sends kmersize
-       int MPIRecv(int);
-       #endif
-       
 private:
        
        int kmerSize;
index 6b10078955f99b73fe5680d34a6bc0234d7bc046..f4af8e9b02a92cc0c9bf1d282ec5cacf9d0514f8 100644 (file)
--- a/makefile
+++ b/makefile
@@ -26,10 +26,10 @@ ifeq  ($(strip $(USEREADLINE)),yes)
       -L../readline-6.0
 endif
 
-USEMPI ?= no
-
+USEMPI ?= yes
 
 ifeq  ($(strip $(USEMPI)),yes)
+       CC = mpic++
     CC_OPTIONS += -DUSE_MPI
 endif
 
@@ -151,6 +151,8 @@ mothur : \
                ./clearcutcommand.o\
                ./catchallcommand.o\
                ./splitabundcommand.o\
+               ./splitmatrix.o\
+               ./clustersplitcommand.o\
                ./inputdata.o\
                ./jackknife.o\
                ./kmer.o\
@@ -357,6 +359,8 @@ mothur : \
                ./clearcutcommand.o\
                ./catchallcommand.o\
                ./splitabundcommand.o\
+               ./splitmatrix.o\
+               ./clustersplitcommand.o\
                ./inputdata.o\
                ./jackknife.o\
                ./kmer.o\
@@ -566,6 +570,8 @@ clean :
                ./clearcutcommand.o\
                ./catchallcommand.o\
                ./splitabundcommand.o\
+               ./splitmatrix.o\
+               ./clustersplitcommand.o\
                ./inputdata.o\
                ./jackknife.o\
                ./kmer.o\
@@ -1680,8 +1686,16 @@ install : mothur
 ./catchallcommand.o : catchallcommand.cpp
        $(CC) $(CC_OPTIONS) catchallcommand.cpp -c $(INCLUDE) -o ./catchallcommand.o
 
-# Item # 205 -- catchallcommand --
+# Item # 205 -- splitabundcommand --
 ./splitabundcommand : splitabundcommand
        $(CC) $(CC_OPTIONS) splitabundcommand -c $(INCLUDE) -o ./splitabundcommand
+       
+# Item # 206 -- splitmatrix --
+./splitmatrix : splitmatrix
+       $(CC) $(CC_OPTIONS) splitmatrix -c $(INCLUDE) -o ./splitmatrix
+       
+# Item # 207 -- splitmatrix --
+./clustersplitcommand : clustersplitcommand
+       $(CC) $(CC_OPTIONS) clustersplitcommand -c $(INCLUDE) -o ./clustersplitcommand
 
 ##### END RUN ####
index 399a4bd39007f4c94802cdcda45099699ed5010d..bfac59c5a694db4ec644ffedace6a02f920f2067 100644 (file)
@@ -122,12 +122,13 @@ PhyloTree::PhyloTree(string tfile){
 
                
                #ifdef USE_MPI
-                       int pid, num;
+                       int pid, num, processors;
                        vector<long> positions;
                        
                        MPI_Status status; 
                        MPI_File inMPI;
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
 
                        char inFileName[1024];
                        strcpy(inFileName, tfile.c_str());
@@ -138,12 +139,14 @@ PhyloTree::PhyloTree(string tfile){
                                positions = setFilePosEachLine(tfile, num);
                                
                                //send file positions to all processes
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos 
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&num, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (num+1), MPI_LONG, i, 2001, MPI_COMM_WORLD);
+                               }
                        }else{
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions.resize(num);
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&num, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                               positions.resize(num+1);
+                               MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, 2001, MPI_COMM_WORLD, &status);
                        }
                
                        //read file 
@@ -164,6 +167,7 @@ PhyloTree::PhyloTree(string tfile){
                        }
                        
                        MPI_File_close(&inMPI);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                
                #else
                        ifstream in;
index c8a8bc7b5074bf91ea46e2c30f2364a509d2c5ff..0893a146a84c9e71eb95475eabe66c2b80314c93 100644 (file)
 
 /***********************************************************************/
 
-ReadCluster::ReadCluster(string distfile, float c, string o){
+ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
                globaldata = GlobalData::getInstance();
                m = MothurOut::getInstance();
         distFile = distfile;
                cutoff = c;
                outputDir = o;
+               sortWanted = s;
+               list = NULL;
 }
 
 /***********************************************************************/
@@ -29,7 +31,8 @@ int ReadCluster::read(NameAssignment* nameMap){
                
                if (m->control_pressed) { return 0; }
                
-               OutPutFile = sortFile(distFile, outputDir);
+               if (sortWanted) {  OutPutFile = sortFile(distFile, outputDir);  }
+               else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
                
                return 0;
                        
index fae7c2653890c01bdaed816a61202b80fda7da80..9ca2a1b14010bf518e2bc366e4df9d77285361eb 100644 (file)
@@ -21,7 +21,7 @@
 class ReadCluster {
        
 public:
-       ReadCluster(string, float, string);
+       ReadCluster(string, float, string, bool);
        ~ReadCluster();
        int read(NameAssignment*);
        string getOutputFile() { return OutPutFile; }
@@ -35,6 +35,7 @@ private:
        ListVector* list;
        float cutoff;
        MothurOut* m;
+       bool sortWanted;
        
        int convertPhylip2Column(NameAssignment*);
 };
index f67dba7223eded3e55cc104841657ee229bc0db8..878a06bfb3dd91262eccce5197c54e5324e46452 100644 (file)
@@ -217,8 +217,10 @@ int ScreenSeqsCommand::execute(){
                                MPIPos = setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
                                
                                //send file positions to all processes
-                               MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
                                
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numFastaSeqs / processors;
@@ -249,10 +251,10 @@ int ScreenSeqsCommand::execute(){
                                        }*/
                                }
                        }else{ //you are a child process
-                               MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                MPIPos.resize(numFastaSeqs+1);
-                               MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-                               
+                               MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
                                //figure out how many sequences you have to align
                                numSeqsPerProcessor = numFastaSeqs / processors;
                                int startIndex =  pid * numSeqsPerProcessor;
@@ -285,6 +287,7 @@ int ScreenSeqsCommand::execute(){
                        MPI_File_close(&outMPIGood);
                        MPI_File_close(&outMPIBad);
                        MPI_File_close(&outMPIBadAccnos);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                        
 #else
                                        
index f551eddacd00dade4aff87526f0b5aca59ad2b58..f225d50a15670d23f4b3d484901cfb7e30cd2c29 100644 (file)
@@ -144,11 +144,12 @@ int SeqSummaryCommand::execute(){
                                                delete buf2;
                                                
                                                MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+                                       
+                                               for(int i = 1; i < processors; i++) { 
+                                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                                       MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                               }
                                                
-                                               //send file positions to all processes
-                                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                                               MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos        
-                                                               
                                                //figure out how many sequences you have to do
                                                numSeqsPerProcessor = numSeqs / processors;
                                                int startIndex =  pid * numSeqsPerProcessor;
@@ -156,54 +157,55 @@ int SeqSummaryCommand::execute(){
                                                
                                                //do your part
                                                MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
-                                       
-                                               if (m->control_pressed) {  MPI_File_close(&inMPI); MPI_File_close(&outMPI);  return 0;  }
-                                               
-                                               //get the info from the child processes
-                                               for(int i = 1; i < processors; i++) { 
-                                                       
-                                                       int size;
-                                                       MPI_Recv(&size, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-
-                                                       vector<int> temp; temp.resize(size+1);
-                                                       
-                                                       for(int j = 0; j < 5; j++) { 
-                                                       
-                                                               MPI_Recv(&temp[0], (size+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status); 
-                                                               int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what count this is for
-                                                               
-                                                               if (receiveTag == startTag) { 
-                                                                       for (int k = 0; k < size; k++) {                startPosition.push_back(temp[k]);       }
-                                                               }else if (receiveTag == endTag) { 
-                                                                       for (int k = 0; k < size; k++) {                endPosition.push_back(temp[k]); }
-                                                               }else if (receiveTag == lengthTag) { 
-                                                                       for (int k = 0; k < size; k++) {                seqLength.push_back(temp[k]);   }
-                                                               }else if (receiveTag == baseTag) { 
-                                                                       for (int k = 0; k < size; k++) {                ambigBases.push_back(temp[k]);  }
-                                                               }else if (receiveTag == lhomoTag) { 
-                                                                       for (int k = 0; k < size; k++) {                longHomoPolymer.push_back(temp[k]);     }
-                                                               }
-                                                       } 
                                                
-                                               }
-
-                                                                                               
                                }else { //i am the child process
                        
-                                       MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPIPos.resize(numSeqs+1);
-                                       MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-                                       
+                                       MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                               
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numSeqs / processors;
                                        int startIndex =  pid * numSeqsPerProcessor;
                                        if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
-                                       
+                               
                                        //do your part
                                        MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
-                                       
-                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); return 0;  }
-                                       
+                               }
+                               
+                               MPI_File_close(&inMPI);
+                               MPI_File_close(&outMPI);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                               
+                               if (pid == 0) {
+                                       //get the info from the child processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               int size;
+                                               MPI_Recv(&size, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+
+                                               vector<int> temp; temp.resize(size+1);
+                                               
+                                               for(int j = 0; j < 5; j++) { 
+                                               
+                                                       MPI_Recv(&temp[0], (size+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status); 
+                                                       int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what count this is for
+                                                       
+                                                       if (receiveTag == startTag) { 
+                                                               for (int k = 0; k < size; k++) {                startPosition.push_back(temp[k]);       }
+                                                       }else if (receiveTag == endTag) { 
+                                                               for (int k = 0; k < size; k++) {                endPosition.push_back(temp[k]); }
+                                                       }else if (receiveTag == lengthTag) { 
+                                                               for (int k = 0; k < size; k++) {                seqLength.push_back(temp[k]);   }
+                                                       }else if (receiveTag == baseTag) { 
+                                                               for (int k = 0; k < size; k++) {                ambigBases.push_back(temp[k]);  }
+                                                       }else if (receiveTag == lhomoTag) { 
+                                                               for (int k = 0; k < size; k++) {                longHomoPolymer.push_back(temp[k]);     }
+                                                       }
+                                               } 
+                                       }
+
+                               }else{
+                               
                                        //send my counts
                                        int size = startPosition.size();
                                        MPI_Send(&size, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
@@ -218,12 +220,9 @@ int SeqSummaryCommand::execute(){
                                        ierr = MPI_Send(&(ambigBases[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
                                        longHomoPolymer.push_back(lhomoTag);
                                        ierr = MPI_Send(&(longHomoPolymer[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
-
                                }
                                
-                               MPI_File_close(&inMPI);
-                               MPI_File_close(&outMPI);
-                               
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
@@ -363,7 +362,10 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
 int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, MPI_File& inMPI, MPI_File& outMPI, vector<long>& MPIPos) {    
        try {
                
+               int pid;
                MPI_Status status; 
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
+
                
                for(int i=0;i<num;i++){
                        
@@ -399,7 +401,7 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
                                        
                                MPI_File_write_shared(outMPI, buf3, length, MPI_CHAR, &status);
                                delete buf3;
-                       }                                               
+                       }       
                }
                
                return 0;
diff --git a/splitmatrix.cpp b/splitmatrix.cpp
new file mode 100644 (file)
index 0000000..c52c287
--- /dev/null
@@ -0,0 +1,256 @@
+/*
+ *  splitmatrix.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 5/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "splitmatrix.h"
+
+/***********************************************************************/
+
+SplitMatrix::SplitMatrix(string distfile, string name, float c){
+       m = MothurOut::getInstance();
+       distFile = distfile;
+       cutoff = c;
+       namefile = name;
+}
+
+/***********************************************************************/
+
+int SplitMatrix::split(){
+       try {
+        
+               vector<set<string> > groups;
+               int numGroups = 0;
+
+               ofstream outFile;
+               ifstream dFile;
+               openInputFile(distFile, dFile);
+       
+               while(dFile){
+                       string seqA, seqB;
+                       float dist;
+
+                       dFile >> seqA >> seqB >> dist;
+                       
+                       if(dist < cutoff){
+                               //cout << "in cutoff: " << dist << endl;
+                               int groupIDA = -1;
+                               int groupIDB = -1;
+                               int groupID = -1;
+                               int prevGroupID = -1;
+                               
+                               for(int i=0;i<numGroups;i++){
+                                       set<string>::iterator aIt = groups[i].find(seqA);
+                                       set<string>::iterator bIt = groups[i].find(seqB);
+                                       
+                                       if(groupIDA == -1 && aIt != groups[i].end()){//seqA is not already assigned to a group and is in group[i], so assign seqB to group[i]
+                                               groups[i].insert(seqB);
+                                               groupIDA = i;
+                                               groupID = groupIDA;
+
+                                               //cout << "in aIt: " << groupID << endl;
+       //                                      break;
+                                       }
+                                       else if(groupIDB == -1 && bIt != groups[i].end()){//seqB is not already assigned to a group and is in group[i], so assign seqA to group[i]
+                                               groups[i].insert(seqA);
+                                               groupIDB = i;
+                                               groupID = groupIDB;
+
+                                       //      cout << "in bIt: " << groupID << endl;
+       //                                      break;
+                                       }
+                               
+                                       if(groupIDA != -1 && groupIDB != -1){//both ifs above have been executed, so we need to decide who to assign them to
+                                               if(groupIDA < groupIDB){
+                                               //      cout << "A: " << groupIDA << "\t" << groupIDB << endl;
+                                                       groups[groupIDA].insert(groups[groupIDB].begin(), groups[groupIDB].end()); //merge two groups into groupIDA
+                                                       groups[groupIDB].clear(); 
+                                                       groupID = groupIDA;
+                                               }
+                                               else{
+                                               //      cout << "B: " << groupIDA << "\t" << groupIDB << endl;
+                                                       groups[groupIDB].insert(groups[groupIDA].begin(), groups[groupIDA].end()); //merge two groups into groupIDB
+                                                       groups[groupIDA].clear();  
+                                                       groupID = groupIDB;
+                                               }
+                                               break;
+                                       }
+                               }
+                               
+       //windows is gonna gag on the reuse of outFile, will need to make it local...
+                               
+                               if(groupIDA == -1 && groupIDB == -1){ //we need a new group
+                                       set<string> newGroup;
+                                       newGroup.insert(seqA);
+                                       newGroup.insert(seqB);
+                                       groups.push_back(newGroup);
+                                                                       
+                                       outFile.close();
+                                       string fileName = distFile + "." + toString(numGroups) + ".temp";
+                                       outFile.open(fileName.c_str(), ios::ate);
+
+                                       outFile << seqA << '\t' << seqB << '\t' << dist << endl;
+                                       numGroups++;
+                               }
+                               else{
+                                       string fileName = distFile + "." + toString(groupID) + ".temp";
+                                       if(groupID != prevGroupID){
+                                               outFile.close();
+                                               outFile.open(fileName.c_str(), ios::app);
+                                               prevGroupID     = groupID;
+                                       }
+                                       outFile << seqA << '\t' << seqB << '\t' << dist << endl;
+                                       
+                                       if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above
+                                               string row, column, distance;
+                                               if(groupIDA<groupIDB){
+                                                       string fileName = distFile + "." + toString(groupIDB) + ".temp";
+                                                       ifstream fileB(fileName.c_str());
+                                                       while(fileB){
+                                                               fileB >> row >> column >> distance;
+                                                               outFile << row << '\t' << column << '\t' << distance << endl;
+                                                               gobble(fileB);
+                                                       }
+                                                       fileB.close();
+                                                       remove(fileName.c_str());
+                                               }
+                                               else{
+                                                       string fileName = distFile + "." + toString(groupIDA) + ".temp";
+                                                       ifstream fileA(fileName.c_str());
+                                                       while(fileA){
+                                                               fileA >> row >> column >> distance;
+                                                               outFile << row << '\t' << column << '\t' << distance << endl;
+                                                               gobble(fileA);
+                                                       }
+                                                       fileA.close();
+                                                       remove(fileName.c_str());
+                                               }                                       
+                                       }
+                               }
+                       }
+                       gobble(dFile);
+               }
+               outFile.close();
+               dFile.close();
+       
+               ifstream bigNameFile(namefile.c_str());
+               if(!bigNameFile){
+                       cerr << "Error: We can't open the name file\n";
+                       exit(1);
+               }
+               
+               map<string, string> nameMap;
+               string name, nameList;
+               while(bigNameFile){
+                       bigNameFile >> name >> nameList;
+                       nameMap[name] = nameList;
+                       gobble(bigNameFile);
+               }
+               bigNameFile.close();
+                       
+               for(int i=0;i<numGroups;i++){  //parse names file to match distance files
+                       int numSeqsInGroup = groups[i].size();
+                       
+                       if(numSeqsInGroup > 0){
+                               string fileName = namefile + "." + toString(i) + ".temp";
+                               ofstream smallNameFile(fileName.c_str(), ios::ate);
+                               
+                               for(set<string>::iterator gIt=groups[i].begin();gIt!=groups[i].end();gIt++){
+                                       map<string,string>::iterator nIt = nameMap.find(*gIt);
+                                       
+                                       if (nIt != nameMap.end()) {
+                                               smallNameFile << nIt->first << '\t' << nIt->second << endl;
+                                               nameMap.erase(nIt);
+                                       }else{
+                                               m->mothurOut((*gIt) + " is in your distance file and not in your namefile.  Please correct."); m->mothurOutEndLine(); exit(1);
+                                       }
+                               }
+                               smallNameFile.close();
+                       }
+               }
+                       
+               //names of singletons
+               if (nameMap.size() != 0) {
+                       singleton = namefile + ".extra.temp";
+                       ofstream remainingNames(singleton.c_str(), ios::ate);
+                       for(map<string,string>::iterator nIt=nameMap.begin();nIt!=nameMap.end();nIt++){
+                               remainingNames << nIt->first << '\t' << nIt->second << endl;
+                       }
+                       remainingNames.close();
+               }else { singleton = "none"; }
+                       
+               for(int i=0;i<numGroups;i++){
+                       if(groups[i].size() > 0){
+                               string tempNameFile = namefile + "." + toString(i) + ".temp";
+                               string tempDistFile = distFile + "." + toString(i) + ".temp";
+                               
+                               map<string, string> temp;
+                               temp[tempDistFile] = tempNameFile;
+                               dists.push_back(temp);
+                       }
+               }
+                               
+               return 0;
+                       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitMatrix", "split");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+//sorts biggest to smallest
+inline bool compareFileSizes(map<string, string> left, map<string, string> right){
+       
+       FILE * pFile;
+       long leftsize = 0;
+               
+       //get num bytes in file
+       string filename = left.begin()->first;
+       pFile = fopen (filename.c_str(),"rb");
+       string error = "Error opening " + filename;
+       if (pFile==NULL) perror (error.c_str());
+       else{
+               fseek (pFile, 0, SEEK_END);
+               leftsize=ftell (pFile);
+               fclose (pFile);
+       }
+
+       FILE * pFile2;
+       long rightsize = 0;
+               
+       //get num bytes in file
+       filename = right.begin()->first;
+       pFile2 = fopen (filename.c_str(),"rb");
+       error = "Error opening " + filename;
+       if (pFile2==NULL) perror (error.c_str());
+       else{
+               fseek (pFile2, 0, SEEK_END);
+               rightsize=ftell (pFile2);
+               fclose (pFile2);
+       }
+
+       return (leftsize > rightsize);  
+} 
+/***********************************************************************/
+//returns map of distance files -> namefile sorted by distance file size
+vector< map< string, string> > SplitMatrix::getDistanceFiles(){
+       try {   
+               
+               sort(dists.begin(), dists.end(), compareFileSizes);
+               
+               return dists;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitMatrix", "getDistanceFiles");
+               exit(1);
+       }
+}
+/***********************************************************************/
+SplitMatrix::~SplitMatrix(){}
+/***********************************************************************/
+
diff --git a/splitmatrix.h b/splitmatrix.h
new file mode 100644 (file)
index 0000000..5974ff1
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef SPLITMATRIX_H
+#define SPLITMATRIX_H
+/*
+ *  splitmatrix.h
+ *  Mothur
+ *
+ *  Created by westcott on 5/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "mothurout.h"
+
+/******************************************************/
+
+class SplitMatrix  {
+       
+       public:
+
+               SplitMatrix(string, string, float); //column formatted distance file, namesfile, cutoff
+               ~SplitMatrix();
+               int split();
+               vector< map<string, string> > getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
+               string getSingletonNames() { return singleton; } //returns namesfile containing singletons
+       
+       private:
+               string distFile, namefile, singleton;
+               vector< map< string, string> > dists;
+               float cutoff;
+               
+               MothurOut* m;
+};
+
+/******************************************************/
+
+#endif
+
index b3496f60703ae6cadf09ad8159fdd4fa1fbf27e6..4fc342dd8bd26f7809ef93ce57da1b5563ee4ad1 100644 (file)
@@ -82,36 +82,4 @@ void SuffixDB::addSequence(Sequence seq) {
 SuffixDB::~SuffixDB(){                                                                                                         
        for (int i = (suffixForest.size()-1); i >= 0; i--) {  suffixForest.pop_back();  }
 }
-#ifdef USE_MPI 
-/**************************************************************************************************/
-int SuffixDB::MPISend(int receiver) {
-       try {
-               
-               //send numSeqs - int
-               MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
-                                                                       
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SuffixDB", "MPISend");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int SuffixDB::MPIRecv(int sender) {
-       try {
-               MPI_Status status;
-               //receive numSeqs - int
-               MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
-               
-               suffixForest.resize(numSeqs);
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SuffixDB", "MPIRecv");
-               exit(1);
-       }
-}
-#endif 
 /**************************************************************************************************/
index 4dc7e0fadd0cc1575250da05029d3ef32b388894..393b4a52f3fde135a1dd80009b6dc44dc5340737 100644 (file)
@@ -33,11 +33,6 @@ public:
        void generateDB() {}; //adding sequences generates the db
        void addSequence(Sequence);
        vector<int> findClosestSequences(Sequence*, int);
-       
-       #ifdef USE_MPI  
-       int MPISend(int); //just sends numSeqs
-       int MPIRecv(int);
-       #endif
 
 private:
        vector<SuffixTree> suffixForest;
index 4ea677f4df472014321ee8d00fc7bf45d5e322f3..163423b79dddc49d99ab38aa197291d0b1b254e7 100644 (file)
@@ -209,8 +209,10 @@ string ValidParameters::validFile(map<string, string> container, string paramete
                        if(isFile == true) {
                        
                        #ifdef USE_MPI  
-                               int pid;
+                               int pid, processors;
+                               MPI_Status status;
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors);
                                
                                if (pid == 0) {
                        #endif
@@ -219,10 +221,14 @@ string ValidParameters::validFile(map<string, string> container, string paramete
                                in.close();
 
                        #ifdef USE_MPI  
-                                       MPI_Bcast(&ableToOpen, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send ableToOPen
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&ableToOpen, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                                       }
                                }else {
-                                       MPI_Bcast(&ableToOpen, 1, MPI_INT, 0, MPI_COMM_WORLD); //get ableToOPen
+                                       MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
                                }
+                               
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                        #endif
                        
                                if (ableToOpen == 1) { return "not open"; }