A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = "<group>"; };
A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = "<group>"; };
A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
+ A730977D11A417BE00117C95 /* splitmatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitmatrix.h; sourceTree = SOURCE_ROOT; };
+ A730977E11A417BE00117C95 /* splitmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splitmatrix.cpp; sourceTree = SOURCE_ROOT; };
+ A73097B911A43E1300117C95 /* clustersplitcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clustersplitcommand.h; sourceTree = "<group>"; };
+ A73097BA11A43E1300117C95 /* clustersplitcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clustersplitcommand.cpp; sourceTree = "<group>"; };
A73953DA11987ED100B0B160 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chopseqscommand.h; sourceTree = "<group>"; };
A73953DB11987ED100B0B160 /* chopseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chopseqscommand.cpp; sourceTree = "<group>"; };
A73F163411A1951D0087CA57 /* splitabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitabundcommand.h; sourceTree = "<group>"; };
A7DA201D113FECD400BF472F /* classifyseqscommand.cpp */,
A7D215C811996C6E00F13F13 /* clearcutcommand.h */,
A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */,
- A7DA2021113FECD400BF472F /* clustercommand.cpp */,
A7DA2022113FECD400BF472F /* clustercommand.h */,
+ A7DA2021113FECD400BF472F /* clustercommand.cpp */,
+ A73097B911A43E1300117C95 /* clustersplitcommand.h */,
+ A73097BA11A43E1300117C95 /* clustersplitcommand.cpp */,
A7DA2025113FECD400BF472F /* collectcommand.cpp */,
A7DA2026113FECD400BF472F /* collectcommand.h */,
A7DA2029113FECD400BF472F /* collectsharedcommand.cpp */,
A7DA20ED113FECD400BF472F /* readphylip.h */,
A7DA20EE113FECD400BF472F /* readtree.cpp */,
A7DA20EF113FECD400BF472F /* readtree.h */,
+ A730977D11A417BE00117C95 /* splitmatrix.h */,
+ A730977E11A417BE00117C95 /* splitmatrix.cpp */,
);
name = read;
sourceTree = "<group>";
MPIPos = setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs\r
\r
//send file positions to all processes\r
- MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs\r
- MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos \r
+ for(int i = 1; i < processors; i++) { \r
+ MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
+ MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
+ }\r
\r
//figure out how many sequences you have to align\r
numSeqsPerProcessor = numFastaSeqs / processors;\r
if (tempResult != 0) { MPIWroteAccnos = true; }\r
}\r
}else{ //you are a child process\r
- MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
+ MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
MPIPos.resize(numFastaSeqs+1);\r
- MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
+ MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
+\r
\r
//figure out how many sequences you have to align\r
numSeqsPerProcessor = numFastaSeqs / processors;\r
m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();\r
\r
#ifdef USE_MPI \r
- int pid;\r
+ int pid, processors;\r
vector<long> positions;\r
\r
MPI_Status status; \r
MPI_File inMPI;\r
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);\r
+ int tag = 2001;\r
\r
char inFileName[1024];\r
strcpy(inFileName, fastaFileName.c_str());\r
positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
\r
//send file positions to all processes\r
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs\r
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos \r
+ for(int i = 1; i < processors; i++) { \r
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
+ }\r
}else{\r
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
positions.resize(numSeqs+1);\r
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
}\r
\r
//read file \r
if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }\r
}\r
}\r
- \r
+ \r
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case\r
+ \r
MPI_File_close(&inMPI);\r
\r
#else\r
#ifdef USE_MPI
- int pid, num, num2;
+ int pid, num, num2, processors;
vector<long> positions;
vector<long> positions2;
MPI_File inMPI;
MPI_File inMPI2;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ int tag = 2001;
char inFileName[1024];
strcpy(inFileName, inNumName.c_str());
if (pid == 0) {
positions = setFilePosEachLine(inNumName, num);
-
- //send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
-
positions2 = setFilePosEachLine(inName, num2);
- //send file positions to all processes
- MPI_Bcast(&num2, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions2[0], (num2+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+
+ MPI_Send(&num2, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions2[0], (num2+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(num);
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(num+1);
+ MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
- MPI_Bcast(&num2, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions2.resize(num2);
- MPI_Bcast(&positions2[0], (num2+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-
+ MPI_Recv(&num2, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions2.resize(num2+1);
+ MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
//read numKmers
}
MPI_File_close(&inMPI2);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
in >> numKmers; gobble(in);
MPIBestSend.clear();
}
-
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
//divide breakpoints between processors
exit(1);
}
}
-#ifdef USE_MPI
-/**************************************************************************************************/
-int BlastDB::MPISend(int receiver) {
- try {
-
- //send gapOpen - float
- MPI_Send(&gapOpen, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD);
-
- //send gapExtend - float
- MPI_Send(&gapExtend, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD);
-
- //send match - float
- MPI_Send(&match, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD);
-
- //send mismatch - float
- MPI_Send(&misMatch, 1, MPI_FLOAT, receiver, 2001, MPI_COMM_WORLD);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "BlastDB", "MPISend");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int BlastDB::MPIRecv(int sender) {
- try {
- MPI_Status status;
-
- //receive gapOpen - float
- MPI_Recv(&gapOpen, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-
- //receive gapExtend - float
- MPI_Recv(&gapExtend, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-
- //receive match - float
- MPI_Recv(&match, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-
- //receive mismatch - float
- MPI_Recv(&misMatch, 1, MPI_FLOAT, sender, 2001, MPI_COMM_WORLD, &status);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "BlastDB", "MPIRecv");
- exit(1);
- }
-}
-#endif
/**************************************************************************************************/
/**************************************************************************************************/
vector<int> findClosestSequences(Sequence*, int);
vector<int> findClosestMegaBlast(Sequence*, int);
- #ifdef USE_MPI
- int MPISend(int); //just sends gapOpen, gapExtend, match and mismatch
- int MPIRecv(int);
- #endif
-
private:
string dbFileName;
string queryFileName;
m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
#ifdef USE_MPI
- int pid;
+ int pid, processors;
vector<long> positions;
int numSeqs;
+ int tag = 2001;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
//char* inFileName = new char[file.length()];
//memcpy(inFileName, file.c_str(), file.length());
positions = setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
positions.resize(numSeqs+1);
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
//read file
}
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
ifstream in;
MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
if (tempResult != 0) { MPIWroteAccnos = true; }
}
}else{ //you are a child process
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numSeqs+1);
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
MPI_File_close(&outMPI);
MPI_File_close(&outMPIAccnos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
//delete accnos file if blank
if (pid == 0) {
if (!MPIWroteAccnos) {
MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);
}
}else{ //you are a child process
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numSeqs+1);
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
//close files
MPI_File_close(&inMPI);
MPI_File_close(&outMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
//break up file
MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
if (tempResult != 0) { MPIWroteAccnos = true; }
}
}else{ //you are a child process
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numSeqs+1);
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
MPI_File_close(&inMPI);
MPI_File_close(&outMPI);
MPI_File_close(&outMPIAccnos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
//delete accnos file if blank
if (pid == 0) {
MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
if (tempResult != 0) { MPIWroteAccnos = true; }
}
}else{ //you are a child process
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numSeqs+1);
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
MPI_File_close(&inMPI);
MPI_File_close(&outMPI);
MPI_File_close(&outMPIAccnos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
//delete accnos file if blank
if (pid == 0) {
m->mothurOut("Generating search database... "); cout.flush();
#ifdef USE_MPI
- int pid;
+ int pid, processors;
vector<long> positions;
+ int tag = 2001;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
//char* inFileName = new char[tempFile.length()];
//memcpy(inFileName, tempFile.c_str(), tempFile.length());
positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(numSeqs);
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
//create database
database->generateDB();
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
//need to know number of template seqs for suffixdb
m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
#ifdef USE_MPI
- int pid, num;
+ int pid, num, processors;
vector<long> positions;
+ int tag = 2001;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
//char* inFileName = new char[file.length()];
//memcpy(inFileName, file.c_str(), file.length());
positions = setFilePosEachLine(file, num);
//send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(num);
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(num+1);
+ MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
//read file
}
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
ifstream inTax;
openInputFile(file, inTax);
MPIPos = setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to align
numSeqsPerProcessor = numFastaSeqs / processors;
MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
}
}else{ //you are a child process
- MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numFastaSeqs+1);
- MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = numFastaSeqs / processors;
MPI_File_close(&inMPI);
MPI_File_close(&outMPINewTax);
MPI_File_close(&outMPITempTax);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
--- /dev/null
+/*
+ * clustersplitcommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 5/19/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clustersplitcommand.h"
+#include "readcluster.h"
+#include "splitmatrix.h"
+#include "readphylip.h"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
+ClusterSplitCommand::ClusterSplitCommand(string option) {
+ try{
+ globaldata = GlobalData::getInstance();
+ abort = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+ //valid paramters for this command
+ string Array[] = {"phylip","column","name","cutoff","precision","method","showabund","timing","hard","processors","splitcutoff","outputdir","inputdir"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ map<string,string>::iterator it;
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
+ abort = true;
+ }
+ }
+
+ globaldata->newRead();
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("phylip");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["phylip"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("column");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["column"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+ }
+
+ //check for required parameters
+ phylipfile = validParameter.validFile(parameters, "phylip", true);
+ if (phylipfile == "not open") { abort = true; }
+ else if (phylipfile == "not found") { phylipfile = ""; }
+ else { distfile = phylipfile; format = "phylip"; }
+
+ columnfile = validParameter.validFile(parameters, "column", true);
+ if (columnfile == "not open") { abort = true; }
+ else if (columnfile == "not found") { columnfile = ""; }
+ else { distfile = columnfile; format = "column"; }
+
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { abort = true; }
+ else if (namefile == "not found") { namefile = ""; }
+
+ if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a hcluster command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
+ else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a hcluster command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
+
+ if (columnfile != "") {
+ if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
+ }
+
+ //check for optional parameter and set defaults
+ // ...at some point should added some additional type checking...
+ //get user cutoff and precision or use defaults
+ string temp;
+ temp = validParameter.validFile(parameters, "precision", false);
+ if (temp == "not found") { temp = "100"; }
+ //saves precision legnth for formatting below
+ length = temp.length();
+ convert(temp, precision);
+
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
+ hard = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
+ convert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "cutoff", false);
+ if (temp == "not found") { temp = "10"; }
+ convert(temp, cutoff);
+ if (!hard) { cutoff += (5 / (precision * 10.0)); }
+
+ temp = validParameter.validFile(parameters, "splitcutoff", false);
+ if (temp == "not found") { temp = "0.10"; }
+ convert(temp, splitcutoff);
+ if (!hard) { splitcutoff += (5 / (precision * 10.0)); }
+
+ method = validParameter.validFile(parameters, "method", false);
+ if (method == "not found") { method = "furthest"; }
+
+ if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
+ else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
+
+ showabund = validParameter.validFile(parameters, "showabund", false);
+ if (showabund == "not found") { showabund = "T"; }
+
+ timing = validParameter.validFile(parameters, "timing", false);
+ if (timing == "not found") { timing = "F"; }
+
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+void ClusterSplitCommand::help(){
+ try {
+ m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
+ m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n");
+ m->mothurOut("The cluster command should be in the following format: \n");
+ m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+ m->mothurOut("The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed.\n\n");
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "help");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+ClusterSplitCommand::~ClusterSplitCommand(){}
+
+//**********************************************************************************************************************
+
+int ClusterSplitCommand::execute(){
+ try {
+
+ if (abort == true) { return 0; }
+
+ //****************** file prep work ******************************//
+
+ //if user gave a phylip file convert to column file
+ if (format == "phylip") {
+ ReadCluster* convert = new ReadCluster(distfile, cutoff, outputDir, false);
+
+ NameAssignment* nameMap = NULL;
+ convert->read(nameMap);
+
+ if (m->control_pressed) { delete convert; return 0; }
+
+ string distfile = convert->getOutputFile();
+
+ //if no names file given with phylip file, create it
+ ListVector* listToMakeNameFile = convert->getListVector();
+ if (namefile == "") { //you need to make a namefile for split matrix
+ ofstream out;
+ namefile = phylipfile + ".names";
+ openOutputFile(namefile, out);
+ for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
+ string bin = listToMakeNameFile->get(i);
+ out << bin << '\t' << bin << endl;
+ }
+ out.close();
+ }
+ delete listToMakeNameFile;
+ delete convert;
+ }
+
+ if (m->control_pressed) { return 0; }
+
+ time_t estart = time(NULL);
+
+ //split matrix into non-overlapping groups
+ SplitMatrix* split = new SplitMatrix(distfile, namefile, splitcutoff);
+ split->split();
+
+ if (m->control_pressed) { delete split; return 0; }
+
+ string singletonName = split->getSingletonNames();
+ vector< map<string, string> > distName = split->getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
+ delete split;
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
+ estart = time(NULL);
+
+ if (m->control_pressed) { return 0; }
+
+ //****************** break up files between processes and cluster each file set ******************************//
+ vector<string> listFileNames;
+ set<string> labels;
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
+ }else{
+ vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
+ dividedNames.resize(processors);
+
+ //for each file group figure out which process will complete it
+ //want to divide the load intelligently so the big files are spread between processes
+ int count = 1;
+ for (int i = 0; i < distName.size(); i++) {
+ int processToAssign = (i+1) % processors;
+ if (processToAssign == 0) { processToAssign = processors; }
+
+ dividedNames[(processToAssign-1)].push_back(distName[i]);
+ }
+
+ createProcesses(dividedNames);
+
+ if (m->control_pressed) { return 0; }
+
+ //get list of list file names from each process
+ for(int i=0;i<processors;i++){
+ string filename = toString(processIDS[i]) + ".temp";
+ ifstream in;
+ openInputFile(filename, in);
+
+ while(!in.eof()) {
+ string tempName;
+ in >> tempName; gobble(in);
+ listFileNames.push_back(tempName);
+ }
+ in.close();
+ remove((toString(processIDS[i]) + ".temp").c_str());
+
+ //get labels
+ filename = toString(processIDS[i]) + ".temp.labels";
+ ifstream in2;
+ openInputFile(filename, in2);
+
+ while(!in2.eof()) {
+ string tempName;
+ in2 >> tempName; gobble(in);
+ if (labels.count(tempName) == 0) { labels.insert(tempName); }
+ }
+ in2.close();
+ remove((toString(processIDS[i]) + ".temp.labels").c_str());
+ }
+ }
+ #else
+ listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
+ #endif
+
+ if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
+
+ //****************** merge list file and create rabund and sabund files ******************************//
+
+ mergeLists(listFileNames, singletonName, labels);
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ClusterSplitCommand::mergeLists(vector<string> listNames, string singleton, set<string> userLabels){
+ try {
+ if (outputDir == "") { outputDir += hasPath(distfile); }
+ fileroot = outputDir + getRootName(getSimpleName(distfile));
+
+ openOutputFile(fileroot+ tag + ".sabund", outSabund);
+ openOutputFile(fileroot+ tag + ".rabund", outRabund);
+ openOutputFile(fileroot+ tag + ".list", outList);
+
+ outputNames.push_back(fileroot+ tag + ".sabund");
+ outputNames.push_back(fileroot+ tag + ".rabund");
+ outputNames.push_back(fileroot+ tag + ".list");
+
+ //read in singletons
+ ListVector* listSingle = NULL;
+ if (singleton != "none") {
+ ifstream in;
+ openInputFile(singleton, in);
+
+ string firstCol, secondCol;
+ listSingle = new ListVector();
+ while (!in.eof()) {
+ in >> firstCol >> secondCol; gobble(in);
+ listSingle->push_back(secondCol);
+ }
+ in.close();
+ }
+
+ vector<float> orderFloat;
+
+ //go through users set and make them floats so we can sort them
+ for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
+ float temp;
+
+ if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
+ convert(*it, temp);
+ orderFloat.push_back(temp);
+ }else if (*it == "unique") { orderFloat.push_back(-1.0); }
+ else {
+ userLabels.erase(*it);
+ it--;
+ }
+ }
+
+ //sort order
+ sort(orderFloat.begin(), orderFloat.end());
+
+ vector<InputData*> inputs;
+ vector<string> lastLabels;
+ for (int i = 0; i < listNames.size(); i++) {
+ InputData* input = new InputData(listNames[i], "list");
+ inputs.push_back(input);
+
+ ifstream in;
+ openInputFile(listNames[i], in);
+ ListVector tempList(in);
+ lastLabels.push_back(tempList.getLabel());
+ in.close();
+ }
+
+ ListVector* merged = NULL;
+
+ //for each label needed
+ for(int l = 0; l < orderFloat.size(); l++){
+
+ string thisLabel;
+ if (orderFloat[l] == -1) { thisLabel = "unique"; }
+ else { thisLabel = toString(orderFloat[l], length-1); }
+
+ //get the list info from each file
+ for (int k = 0; k < listNames.size(); k++) {
+
+ ListVector* list = inputs[k]->getListVector();
+
+ //this file has reached the end
+ if (list == NULL) { list = inputs[k]->getListVector(lastLabels[k], true); }
+
+ float labelFloat;
+ if (list->getLabel() == "unique") { labelFloat = -1.0; }
+ else { convert(list->getLabel(), labelFloat); }
+
+ //check for missing labels
+ if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
+ //if its bigger get last label, otherwise keep it
+ delete list;
+ list = inputs[k]->getListVector(lastLabels[k], true); //get last list vector to use, you actually want to move back in the file
+ }
+ lastLabels[k] = list->getLabel();
+
+ //is this the first file
+ if (merged == NULL) { merged = new ListVector(); merged->setLabel(thisLabel); }
+
+ for (int j = 0; j < list->getNumBins(); j++) {
+ merged->push_back(list->get(j));
+ }
+
+ delete list;
+ }
+
+ //add in singletons
+ for (int j = 0; j < listSingle->getNumBins(); j++) {
+ merged->push_back(listSingle->get(j));
+ }
+
+ //print to files
+ printData(merged);
+
+ delete merged; merged = NULL;
+ }
+
+ if (listSingle != NULL) { delete listSingle; remove(singleton.c_str()); }
+
+ for (int i = 0; i < listNames.size(); i++) { delete inputs[i]; remove(listNames[i].c_str()); }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "mergeLists");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+void ClusterSplitCommand::printData(ListVector* oldList){
+ try {
+ string label = oldList->getLabel();
+ RAbundVector oldRAbund = oldList->getRAbundVector();
+
+ oldRAbund.setLabel(label);
+ if (isTrue(showabund)) {
+ oldRAbund.getSAbundVector().print(cout);
+ }
+ oldRAbund.print(outRabund);
+ oldRAbund.getSAbundVector().print(outSabund);
+
+ oldList->print(outList);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "printData");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
+ try {
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ int process = 0;
+ int exitCommand = 1;
+ processIDS.clear();
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ set<string> labels;
+ vector<string> listFileNames = cluster(dividedNames[process], labels);
+
+ //write out names to file
+ string filename = toString(getpid()) + ".temp";
+ ofstream out;
+ openOutputFile(filename, out);
+ for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl; }
+ out.close();
+
+ //print out labels
+ ofstream outLabels;
+ filename = toString(getpid()) + ".temp.labels";
+ openOutputFile(filename, outLabels);
+
+ for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
+ outLabels << (*it) << endl;
+ }
+ outLabels.close();
+
+ exit(0);
+ }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+ }
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processors;i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ return exitCommand;
+ #endif
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "createProcesses");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
+ try {
+ Cluster* cluster;
+ SparseMatrix* matrix;
+ ListVector* list;
+ ListVector oldList;
+ RAbundVector* rabund;
+
+ vector<string> listFileNames;
+
+ //cluster each distance file
+ for (int i = 0; i < distNames.size(); i++) {
+
+ string thisNamefile = distNames[i].begin()->second;
+ string thisDistFile = distNames[i].begin()->first;
+
+ //read in distance file
+ globaldata->setNameFile(thisNamefile);
+ globaldata->setColumnFile(thisDistFile); globaldata->setFormat("column");
+
+ ReadMatrix* read = new ReadColumnMatrix(thisDistFile);
+ read->setCutoff(cutoff);
+
+ NameAssignment* nameMap = new NameAssignment(thisNamefile);
+ nameMap->readMap();
+ read->read(nameMap);
+
+ if (m->control_pressed) { delete read; delete nameMap; return listFileNames; }
+
+ list = read->getListVector();
+ oldList = *list;
+ matrix = read->getMatrix();
+
+ delete read;
+ delete nameMap;
+
+ m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+
+ rabund = new RAbundVector(list->getRAbundVector());
+
+ //create cluster
+ if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+ else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+ else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method); }
+ tag = cluster->getTag();
+
+ if (outputDir == "") { outputDir += hasPath(thisDistFile); }
+ fileroot = outputDir + getRootName(getSimpleName(thisDistFile));
+
+ ofstream listFile;
+ openOutputFile(fileroot+ tag + ".list", listFile);
+
+ listFileNames.push_back(fileroot+ tag + ".list");
+
+ time_t estart = time(NULL);
+
+ float previousDist = 0.00000;
+ float rndPreviousDist = 0.00000;
+
+ oldList = *list;
+
+ print_start = true;
+ start = time(NULL);
+ double saveCutoff = cutoff;
+
+ while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+
+ if (m->control_pressed) { //clean up
+ delete matrix; delete list; delete cluster; delete rabund;
+ listFile.close();
+ for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); }
+ listFileNames.clear(); return listFileNames;
+ }
+
+ cluster->update(cutoff);
+
+ float dist = matrix->getSmallDist();
+ float rndDist = roundDist(dist, precision);
+
+ if(previousDist <= 0.0000 && dist != previousDist){
+ oldList.setLabel("unique");
+ oldList.print(listFile);
+ if (labels.count("unique") == 0) { labels.insert("unique"); }
+ }
+ else if(rndDist != rndPreviousDist){
+ oldList.setLabel(toString(rndPreviousDist, length-1));
+ oldList.print(listFile);
+ if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
+ }
+
+ previousDist = dist;
+ rndPreviousDist = rndDist;
+ oldList = *list;
+ }
+
+
+ if(previousDist <= 0.0000){
+ oldList.setLabel("unique");
+ oldList.print(listFile);
+ if (labels.count("unique") == 0) { labels.insert("unique"); }
+ }
+ else if(rndPreviousDist<cutoff){
+ oldList.setLabel(toString(rndPreviousDist, length-1));
+ oldList.print(listFile);
+ if (labels.count(toString(rndPreviousDist, length-1)) == 0) { labels.insert(toString(rndPreviousDist, length-1)); }
+ }
+
+ delete matrix; delete list; delete cluster; delete rabund;
+ listFile.close();
+
+ if (m->control_pressed) { //clean up
+ for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); }
+ listFileNames.clear(); return listFileNames;
+ }
+
+ remove(thisDistFile.c_str());
+ remove(thisNamefile.c_str());
+ }
+
+
+ return listFileNames;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "cluster");
+ exit(1);
+ }
+
+
+}
+
+//**********************************************************************************************************************
--- /dev/null
+#ifndef CLUSTERSPLITCOMMAND_H
+#define CLUSTERSPLITCOMMAND_H
+
+/*
+ * clustersplitcommand.h
+ * Mothur
+ *
+ * Created by westcott on 5/19/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+#include "cluster.hpp"
+#include "sparsematrix.hpp"
+#include "globaldata.hpp"
+
+
+class ClusterSplitCommand : public Command {
+
+public:
+ ClusterSplitCommand(string);
+ ~ClusterSplitCommand();
+ int execute();
+ void help();
+
+private:
+ GlobalData* globaldata;
+ vector<int> processIDS; //processid
+ vector<string> outputNames;
+
+ string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing;
+ double cutoff, splitcutoff;
+ int precision, length, processors;
+ bool print_start, abort, hard;
+ time_t start;
+ ofstream outList, outRabund, outSabund;
+
+ void printData(ListVector*);
+ int createProcesses(vector < vector < map<string, string> > >);
+ vector<string> cluster(vector< map<string, string> >, set<string>&);
+ int mergeLists(vector<string>, string, set<string>);
+};
+
+#endif
+
#include "clearcutcommand.h"
#include "catchallcommand.h"
#include "splitabundcommand.h"
+#include "clustersplitcommand.h"
/*******************************************************/
commands["clearcut"] = "clearcut";
commands["catchall"] = "catchall";
commands["split.abund"] = "split.abund";
+ commands["cluster.split"] = "cluster.split";
commands["classify.seqs"] = "MPIEnabled";
commands["dist.seqs"] = "MPIEnabled";
commands["filter.seqs"] = "MPIEnabled";
else if(commandName == "clearcut") { command = new ClearcutCommand(optionString); }
else if(commandName == "catchall") { command = new CatchAllCommand(optionString); }
else if(commandName == "split.abund") { command = new SplitAbundCommand(optionString); }
+ else if(commandName == "cluster.split") { command = new ClusterSplitCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };
virtual int getMaxKmer(){ return 1; };
- #ifdef USE_MPI
- virtual int MPISend(int) = 0;
- virtual int MPIRecv(int) = 0;
- #endif
-
protected:
MothurOut* m;
int numSeqs, longest;
MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
}
}
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
#endif
//executes valid command
#ifdef USE_MPI
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
-
+cout << pid << " is waiting " << commandName << endl;
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+cout << pid << " is here " << commandName << endl;
if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
#endif
//executes valid command
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
#endif
//executes valid command
numSeqs += num;
//send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to do
numSeqsPerProcessor = num / processors;
}
}else { //you are a child process
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- numSeqs += num;
+ MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(num+1);
- MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ numSeqs += num;
+ MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = num / processors;
MPI_File_close(&outMPI);
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
F.setLength(alignmentLength);
- if(soft != 0 || isTrue(vertical)){
+ if(trump != '*' || isTrue(vertical) || soft != 0){
F.initialize();
}
numSeqs += num;
//send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to do
numSeqsPerProcessor = num / processors;
if (m->control_pressed) { MPI_File_close(&inMPI); return 0; }
}else { //i am the child process
-
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(num+1);
numSeqs += num;
- MPI_Bcast(&MPIPos[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
//figure out how many sequences you have to align
numSeqsPerProcessor = num / processors;
}
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
vector<int> temp; temp.resize(alignmentLength+1);
//get the frequencies from the child processes
- for(int i = 0; i < ((processors-1)*5); i++) {
- MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status);
- int receiveTag = temp[temp.size()-1]; //child process added a int to the end to indicate what letter count this is for
-
- if (receiveTag == Atag) { //you are recieveing the A frequencies
- for (int k = 0; k < alignmentLength; k++) { F.a[k] += temp[k]; }
- }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
- for (int k = 0; k < alignmentLength; k++) { F.t[k] += temp[k]; }
- }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
- for (int k = 0; k < alignmentLength; k++) { F.c[k] += temp[k]; }
- }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
- for (int k = 0; k < alignmentLength; k++) { F.g[k] += temp[k]; }
- }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
- for (int k = 0; k < alignmentLength; k++) { F.gap[k] += temp[k]; }
+ for(int i = 1; i < processors; i++) {
+
+ for (int j = 0; j < 5; j++) {
+
+ MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status);
+ int receiveTag = temp[temp.size()-1]; //child process added a int to the end to indicate what letter count this is for
+
+ if (receiveTag == Atag) { //you are recieveing the A frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.a[k] += temp[k]; }
+ }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.t[k] += temp[k]; }
+ }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.c[k] += temp[k]; }
+ }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.g[k] += temp[k]; }
+ }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.gap[k] += temp[k]; }
+ }
}
}
}else{
}
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
if (pid == 0) { //only one process should output the filter
#endif
F.setNumSeqs(numSeqs);
#ifdef USE_MPI
//send filter string to kids
- MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
+ //for(int i = 1; i < processors; i++) {
+ // MPI_Send(&filterString[0], alignmentLength, MPI_CHAR, i, 2001, MPI_COMM_WORLD);
+ //}
+ MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
}else{
//recieve filterString
char* tempBuf = new char[alignmentLength];
+ //MPI_Recv(&tempBuf[0], alignmentLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD, &status);
MPI_Bcast(tempBuf, alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
filterString = tempBuf;
MPI_Barrier(MPI_COMM_WORLD);
#endif
-
-
+
return filterString;
}
catch(exception& e) {
time_t estart = time(NULL);
if (!sorted) {
- read = new ReadCluster(distfile, cutoff, outputDir);
+ read = new ReadCluster(distfile, cutoff, outputDir, true);
read->setFormat(format);
read->read(globaldata->nameMap);
ListVector* InputData::getListVector(){
try {
- if(fileHandle){
+ if(!fileHandle.eof()){
if(format == "list") {
list = new ListVector(fileHandle);
}else{ list = NULL; }
}
/***********************************************************************/
-
ListVector* InputData::getListVector(string label){
try {
ifstream in;
exit(1);
}
}
+/***********************************************************************/
+ListVector* InputData::getListVector(string label, bool resetFP){
+ try {
+ string thisLabel;
+ fileHandle.clear();
+ fileHandle.seekg(0);
+
+ if(fileHandle){
+ if (format == "list") {
+
+ while (fileHandle.eof() != true) {
+
+ list = new ListVector(fileHandle); gobble(fileHandle);
+ thisLabel = list->getLabel();
+
+ //if you are at the last label
+ if (thisLabel == label) { break; }
+ //so you don't loose this memory
+ else { delete list; }
+ }
+ }else{ list = NULL; }
+
+ return list;
+ }
+ else{
+ return NULL;
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "InputData", "getListVector");
+ exit(1);
+ }
+}
/***********************************************************************/
~InputData();
ListVector* getListVector();
ListVector* getListVector(string); //pass the label you want
+ ListVector* getListVector(string, bool); //pass the label you want, reset filepointer
SharedListVector* getSharedListVector();
SharedListVector* getSharedListVector(string); //pass the label you want
OrderVector* getOrderVector();
exit(1);
}
}
-#ifdef USE_MPI
-/**************************************************************************************************/
-int KmerDB::MPISend(int receiver) {
- try {
-
- //send kmerSize - int
- MPI_Send(&kmerSize, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "KmerDB", "MPISend");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int KmerDB::MPIRecv(int sender) {
- try {
- MPI_Status status;
-
- //receive kmerSize - int
- MPI_Recv(&kmerSize, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
-
- //set maxKmer
- int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
- count = 0;
- maxKmer = power4s[kmerSize];
- kmerLocations.resize(maxKmer+1);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "KmerDB", "MPIRecv");
- exit(1);
- }
-}
-#endif
/**************************************************************************************************/
vector<int> getSequencesWithKmer(int); //returns vector of sequences that contain kmer passed in
int getMaxKmer() { return maxKmer; }
- #ifdef USE_MPI
- int MPISend(int); //just sends kmersize
- int MPIRecv(int);
- #endif
-
private:
int kmerSize;
-L../readline-6.0
endif
-USEMPI ?= no
-
+USEMPI ?= yes
ifeq ($(strip $(USEMPI)),yes)
+ CC = mpic++
CC_OPTIONS += -DUSE_MPI
endif
./clearcutcommand.o\
./catchallcommand.o\
./splitabundcommand.o\
+ ./splitmatrix.o\
+ ./clustersplitcommand.o\
./inputdata.o\
./jackknife.o\
./kmer.o\
./clearcutcommand.o\
./catchallcommand.o\
./splitabundcommand.o\
+ ./splitmatrix.o\
+ ./clustersplitcommand.o\
./inputdata.o\
./jackknife.o\
./kmer.o\
./clearcutcommand.o\
./catchallcommand.o\
./splitabundcommand.o\
+ ./splitmatrix.o\
+ ./clustersplitcommand.o\
./inputdata.o\
./jackknife.o\
./kmer.o\
./catchallcommand.o : catchallcommand.cpp
$(CC) $(CC_OPTIONS) catchallcommand.cpp -c $(INCLUDE) -o ./catchallcommand.o
-# Item # 205 -- catchallcommand --
+# Item # 205 -- splitabundcommand --
./splitabundcommand : splitabundcommand
$(CC) $(CC_OPTIONS) splitabundcommand -c $(INCLUDE) -o ./splitabundcommand
+
+# Item # 206 -- splitmatrix --
+./splitmatrix : splitmatrix
+ $(CC) $(CC_OPTIONS) splitmatrix -c $(INCLUDE) -o ./splitmatrix
+
+# Item # 207 -- splitmatrix --
+./clustersplitcommand : clustersplitcommand
+ $(CC) $(CC_OPTIONS) clustersplitcommand -c $(INCLUDE) -o ./clustersplitcommand
##### END RUN ####
#ifdef USE_MPI
- int pid, num;
+ int pid, num, processors;
vector<long> positions;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
char inFileName[1024];
strcpy(inFileName, tfile.c_str());
positions = setFilePosEachLine(tfile, num);
//send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (num+1), MPI_LONG, i, 2001, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(num);
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&num, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ positions.resize(num+1);
+ MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, 2001, MPI_COMM_WORLD, &status);
}
//read file
}
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
ifstream in;
/***********************************************************************/
-ReadCluster::ReadCluster(string distfile, float c, string o){
+ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
globaldata = GlobalData::getInstance();
m = MothurOut::getInstance();
distFile = distfile;
cutoff = c;
outputDir = o;
+ sortWanted = s;
+ list = NULL;
}
/***********************************************************************/
if (m->control_pressed) { return 0; }
- OutPutFile = sortFile(distFile, outputDir);
+ if (sortWanted) { OutPutFile = sortFile(distFile, outputDir); }
+ else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column
return 0;
class ReadCluster {
public:
- ReadCluster(string, float, string);
+ ReadCluster(string, float, string, bool);
~ReadCluster();
int read(NameAssignment*);
string getOutputFile() { return OutPutFile; }
ListVector* list;
float cutoff;
MothurOut* m;
+ bool sortWanted;
int convertPhylip2Column(NameAssignment*);
};
MPIPos = setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
//figure out how many sequences you have to align
numSeqsPerProcessor = numFastaSeqs / processors;
}*/
}
}else{ //you are a child process
- MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numFastaSeqs+1);
- MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-
+ MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
//figure out how many sequences you have to align
numSeqsPerProcessor = numFastaSeqs / processors;
int startIndex = pid * numSeqsPerProcessor;
MPI_File_close(&outMPIGood);
MPI_File_close(&outMPIBad);
MPI_File_close(&outMPIBadAccnos);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
delete buf2;
MPIPos = setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
+
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
- //send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
-
//figure out how many sequences you have to do
numSeqsPerProcessor = numSeqs / processors;
int startIndex = pid * numSeqsPerProcessor;
//do your part
MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; }
-
- //get the info from the child processes
- for(int i = 1; i < processors; i++) {
-
- int size;
- MPI_Recv(&size, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
-
- vector<int> temp; temp.resize(size+1);
-
- for(int j = 0; j < 5; j++) {
-
- MPI_Recv(&temp[0], (size+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status);
- int receiveTag = temp[temp.size()-1]; //child process added a int to the end to indicate what count this is for
-
- if (receiveTag == startTag) {
- for (int k = 0; k < size; k++) { startPosition.push_back(temp[k]); }
- }else if (receiveTag == endTag) {
- for (int k = 0; k < size; k++) { endPosition.push_back(temp[k]); }
- }else if (receiveTag == lengthTag) {
- for (int k = 0; k < size; k++) { seqLength.push_back(temp[k]); }
- }else if (receiveTag == baseTag) {
- for (int k = 0; k < size; k++) { ambigBases.push_back(temp[k]); }
- }else if (receiveTag == lhomoTag) {
- for (int k = 0; k < size; k++) { longHomoPolymer.push_back(temp[k]); }
- }
- }
- }
-
-
}else { //i am the child process
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
MPIPos.resize(numSeqs+1);
- MPI_Bcast(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-
+ MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
//figure out how many sequences you have to align
numSeqsPerProcessor = numSeqs / processors;
int startIndex = pid * numSeqsPerProcessor;
if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
-
+
//do your part
MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
-
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; }
-
+ }
+
+ MPI_File_close(&inMPI);
+ MPI_File_close(&outMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
+ if (pid == 0) {
+ //get the info from the child processes
+ for(int i = 1; i < processors; i++) {
+ int size;
+ MPI_Recv(&size, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
+
+ vector<int> temp; temp.resize(size+1);
+
+ for(int j = 0; j < 5; j++) {
+
+ MPI_Recv(&temp[0], (size+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status);
+ int receiveTag = temp[temp.size()-1]; //child process added a int to the end to indicate what count this is for
+
+ if (receiveTag == startTag) {
+ for (int k = 0; k < size; k++) { startPosition.push_back(temp[k]); }
+ }else if (receiveTag == endTag) {
+ for (int k = 0; k < size; k++) { endPosition.push_back(temp[k]); }
+ }else if (receiveTag == lengthTag) {
+ for (int k = 0; k < size; k++) { seqLength.push_back(temp[k]); }
+ }else if (receiveTag == baseTag) {
+ for (int k = 0; k < size; k++) { ambigBases.push_back(temp[k]); }
+ }else if (receiveTag == lhomoTag) {
+ for (int k = 0; k < size; k++) { longHomoPolymer.push_back(temp[k]); }
+ }
+ }
+ }
+
+ }else{
+
//send my counts
int size = startPosition.size();
MPI_Send(&size, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
ierr = MPI_Send(&(ambigBases[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
longHomoPolymer.push_back(lhomoTag);
ierr = MPI_Send(&(longHomoPolymer[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
-
}
- MPI_File_close(&inMPI);
- MPI_File_close(&outMPI);
-
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, MPI_File& inMPI, MPI_File& outMPI, vector<long>& MPIPos) {
try {
+ int pid;
MPI_Status status;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
for(int i=0;i<num;i++){
MPI_File_write_shared(outMPI, buf3, length, MPI_CHAR, &status);
delete buf3;
- }
+ }
}
return 0;
--- /dev/null
+/*
+ * splitmatrix.cpp
+ * Mothur
+ *
+ * Created by westcott on 5/19/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "splitmatrix.h"
+
+/***********************************************************************/
+
+SplitMatrix::SplitMatrix(string distfile, string name, float c){
+ m = MothurOut::getInstance();
+ distFile = distfile;
+ cutoff = c;
+ namefile = name;
+}
+
+/***********************************************************************/
+
+int SplitMatrix::split(){
+ try {
+
+ vector<set<string> > groups;
+ int numGroups = 0;
+
+ ofstream outFile;
+ ifstream dFile;
+ openInputFile(distFile, dFile);
+
+ while(dFile){
+ string seqA, seqB;
+ float dist;
+
+ dFile >> seqA >> seqB >> dist;
+
+ if(dist < cutoff){
+ //cout << "in cutoff: " << dist << endl;
+ int groupIDA = -1;
+ int groupIDB = -1;
+ int groupID = -1;
+ int prevGroupID = -1;
+
+ for(int i=0;i<numGroups;i++){
+ set<string>::iterator aIt = groups[i].find(seqA);
+ set<string>::iterator bIt = groups[i].find(seqB);
+
+ if(groupIDA == -1 && aIt != groups[i].end()){//seqA is not already assigned to a group and is in group[i], so assign seqB to group[i]
+ groups[i].insert(seqB);
+ groupIDA = i;
+ groupID = groupIDA;
+
+ //cout << "in aIt: " << groupID << endl;
+ // break;
+ }
+ else if(groupIDB == -1 && bIt != groups[i].end()){//seqB is not already assigned to a group and is in group[i], so assign seqA to group[i]
+ groups[i].insert(seqA);
+ groupIDB = i;
+ groupID = groupIDB;
+
+ // cout << "in bIt: " << groupID << endl;
+ // break;
+ }
+
+ if(groupIDA != -1 && groupIDB != -1){//both ifs above have been executed, so we need to decide who to assign them to
+ if(groupIDA < groupIDB){
+ // cout << "A: " << groupIDA << "\t" << groupIDB << endl;
+ groups[groupIDA].insert(groups[groupIDB].begin(), groups[groupIDB].end()); //merge two groups into groupIDA
+ groups[groupIDB].clear();
+ groupID = groupIDA;
+ }
+ else{
+ // cout << "B: " << groupIDA << "\t" << groupIDB << endl;
+ groups[groupIDB].insert(groups[groupIDA].begin(), groups[groupIDA].end()); //merge two groups into groupIDB
+ groups[groupIDA].clear();
+ groupID = groupIDB;
+ }
+ break;
+ }
+ }
+
+ //windows is gonna gag on the reuse of outFile, will need to make it local...
+
+ if(groupIDA == -1 && groupIDB == -1){ //we need a new group
+ set<string> newGroup;
+ newGroup.insert(seqA);
+ newGroup.insert(seqB);
+ groups.push_back(newGroup);
+
+ outFile.close();
+ string fileName = distFile + "." + toString(numGroups) + ".temp";
+ outFile.open(fileName.c_str(), ios::ate);
+
+ outFile << seqA << '\t' << seqB << '\t' << dist << endl;
+ numGroups++;
+ }
+ else{
+ string fileName = distFile + "." + toString(groupID) + ".temp";
+ if(groupID != prevGroupID){
+ outFile.close();
+ outFile.open(fileName.c_str(), ios::app);
+ prevGroupID = groupID;
+ }
+ outFile << seqA << '\t' << seqB << '\t' << dist << endl;
+
+ if(groupIDA != -1 && groupIDB != -1){ //merge distance files of two groups you merged above
+ string row, column, distance;
+ if(groupIDA<groupIDB){
+ string fileName = distFile + "." + toString(groupIDB) + ".temp";
+ ifstream fileB(fileName.c_str());
+ while(fileB){
+ fileB >> row >> column >> distance;
+ outFile << row << '\t' << column << '\t' << distance << endl;
+ gobble(fileB);
+ }
+ fileB.close();
+ remove(fileName.c_str());
+ }
+ else{
+ string fileName = distFile + "." + toString(groupIDA) + ".temp";
+ ifstream fileA(fileName.c_str());
+ while(fileA){
+ fileA >> row >> column >> distance;
+ outFile << row << '\t' << column << '\t' << distance << endl;
+ gobble(fileA);
+ }
+ fileA.close();
+ remove(fileName.c_str());
+ }
+ }
+ }
+ }
+ gobble(dFile);
+ }
+ outFile.close();
+ dFile.close();
+
+ ifstream bigNameFile(namefile.c_str());
+ if(!bigNameFile){
+ cerr << "Error: We can't open the name file\n";
+ exit(1);
+ }
+
+ map<string, string> nameMap;
+ string name, nameList;
+ while(bigNameFile){
+ bigNameFile >> name >> nameList;
+ nameMap[name] = nameList;
+ gobble(bigNameFile);
+ }
+ bigNameFile.close();
+
+ for(int i=0;i<numGroups;i++){ //parse names file to match distance files
+ int numSeqsInGroup = groups[i].size();
+
+ if(numSeqsInGroup > 0){
+ string fileName = namefile + "." + toString(i) + ".temp";
+ ofstream smallNameFile(fileName.c_str(), ios::ate);
+
+ for(set<string>::iterator gIt=groups[i].begin();gIt!=groups[i].end();gIt++){
+ map<string,string>::iterator nIt = nameMap.find(*gIt);
+
+ if (nIt != nameMap.end()) {
+ smallNameFile << nIt->first << '\t' << nIt->second << endl;
+ nameMap.erase(nIt);
+ }else{
+ m->mothurOut((*gIt) + " is in your distance file and not in your namefile. Please correct."); m->mothurOutEndLine(); exit(1);
+ }
+ }
+ smallNameFile.close();
+ }
+ }
+
+ //names of singletons
+ if (nameMap.size() != 0) {
+ singleton = namefile + ".extra.temp";
+ ofstream remainingNames(singleton.c_str(), ios::ate);
+ for(map<string,string>::iterator nIt=nameMap.begin();nIt!=nameMap.end();nIt++){
+ remainingNames << nIt->first << '\t' << nIt->second << endl;
+ }
+ remainingNames.close();
+ }else { singleton = "none"; }
+
+ for(int i=0;i<numGroups;i++){
+ if(groups[i].size() > 0){
+ string tempNameFile = namefile + "." + toString(i) + ".temp";
+ string tempDistFile = distFile + "." + toString(i) + ".temp";
+
+ map<string, string> temp;
+ temp[tempDistFile] = tempNameFile;
+ dists.push_back(temp);
+ }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "split");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+//sorts biggest to smallest
+inline bool compareFileSizes(map<string, string> left, map<string, string> right){
+
+ FILE * pFile;
+ long leftsize = 0;
+
+ //get num bytes in file
+ string filename = left.begin()->first;
+ pFile = fopen (filename.c_str(),"rb");
+ string error = "Error opening " + filename;
+ if (pFile==NULL) perror (error.c_str());
+ else{
+ fseek (pFile, 0, SEEK_END);
+ leftsize=ftell (pFile);
+ fclose (pFile);
+ }
+
+ FILE * pFile2;
+ long rightsize = 0;
+
+ //get num bytes in file
+ filename = right.begin()->first;
+ pFile2 = fopen (filename.c_str(),"rb");
+ error = "Error opening " + filename;
+ if (pFile2==NULL) perror (error.c_str());
+ else{
+ fseek (pFile2, 0, SEEK_END);
+ rightsize=ftell (pFile2);
+ fclose (pFile2);
+ }
+
+ return (leftsize > rightsize);
+}
+/***********************************************************************/
+//returns map of distance files -> namefile sorted by distance file size
+vector< map< string, string> > SplitMatrix::getDistanceFiles(){
+ try {
+
+ sort(dists.begin(), dists.end(), compareFileSizes);
+
+ return dists;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SplitMatrix", "getDistanceFiles");
+ exit(1);
+ }
+}
+/***********************************************************************/
+SplitMatrix::~SplitMatrix(){}
+/***********************************************************************/
+
--- /dev/null
+#ifndef SPLITMATRIX_H
+#define SPLITMATRIX_H
+/*
+ * splitmatrix.h
+ * Mothur
+ *
+ * Created by westcott on 5/19/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "mothurout.h"
+
+/******************************************************/
+
+class SplitMatrix {
+
+ public:
+
+ SplitMatrix(string, string, float); //column formatted distance file, namesfile, cutoff
+ ~SplitMatrix();
+ int split();
+ vector< map<string, string> > getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
+ string getSingletonNames() { return singleton; } //returns namesfile containing singletons
+
+ private:
+ string distFile, namefile, singleton;
+ vector< map< string, string> > dists;
+ float cutoff;
+
+ MothurOut* m;
+};
+
+/******************************************************/
+
+#endif
+
SuffixDB::~SuffixDB(){
for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); }
}
-#ifdef USE_MPI
-/**************************************************************************************************/
-int SuffixDB::MPISend(int receiver) {
- try {
-
- //send numSeqs - int
- MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SuffixDB", "MPISend");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int SuffixDB::MPIRecv(int sender) {
- try {
- MPI_Status status;
- //receive numSeqs - int
- MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
-
- suffixForest.resize(numSeqs);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SuffixDB", "MPIRecv");
- exit(1);
- }
-}
-#endif
/**************************************************************************************************/
void generateDB() {}; //adding sequences generates the db
void addSequence(Sequence);
vector<int> findClosestSequences(Sequence*, int);
-
- #ifdef USE_MPI
- int MPISend(int); //just sends numSeqs
- int MPIRecv(int);
- #endif
private:
vector<SuffixTree> suffixForest;
if(isFile == true) {
#ifdef USE_MPI
- int pid;
+ int pid, processors;
+ MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
if (pid == 0) {
#endif
in.close();
#ifdef USE_MPI
- MPI_Bcast(&ableToOpen, 1, MPI_INT, 0, MPI_COMM_WORLD); //send ableToOPen
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&ableToOpen, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+ }
}else {
- MPI_Bcast(&ableToOpen, 1, MPI_INT, 0, MPI_COMM_WORLD); //get ableToOPen
+ MPI_Recv(&ableToOpen, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
}
+
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#endif
if (ableToOpen == 1) { return "not open"; }