From 17a6a53298a907c005fa93fb82af9e533adcda09 Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 11 Mar 2010 13:20:55 +0000 Subject: [PATCH] testing mpi --- bellerophon.cpp | 3 + chimeraseqscommand.cpp | 3 +- commandfactory.cpp | 15 +- commandfactory.hpp | 1 + engine.cpp | 136 +++++++++++++++++- filters.h | 3 +- filterseqscommand.cpp | 315 +++++++++++++++++++++++++++++++++++------ filterseqscommand.h | 5 + getseqscommand.cpp | 7 +- mothur.cpp | 15 +- mothur.h | 3 + mothurout.h | 1 - removeseqscommand.cpp | 7 +- sequence.cpp | 170 +++++++++++++++++----- sequence.hpp | 3 + sparsematrix.hpp | 1 + 16 files changed, 590 insertions(+), 98 deletions(-) diff --git a/bellerophon.cpp b/bellerophon.cpp index 2c545fe..54dfb9b 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -197,6 +197,7 @@ int Bellerophon::getChimeras() { if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; } + left.clear(); right.clear(); vector distMapRight; vector distMapLeft; @@ -251,6 +252,8 @@ int Bellerophon::getChimeras() { //sort Preferences highest to lowest sort(pref.begin(), pref.end(), comparePref); + for (int i = 0; i < seqs.size(); i++) { delete seqs[i]; } seqs.clear(); + return 0; } diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp index 69ce031..65e082b 100644 --- a/chimeraseqscommand.cpp +++ b/chimeraseqscommand.cpp @@ -340,7 +340,8 @@ int ChimeraSeqsCommand::execute(){ m->mothurOut(outputFileName); m->mothurOutEndLine(); if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); } m->mothurOutEndLine(); - + + delete chimera; return 0; } diff --git a/commandfactory.cpp b/commandfactory.cpp index dd97b2f..b4f3654 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -117,7 +117,7 @@ CommandFactory::CommandFactory(){ commands["bootstrap.shared"] = "bootstrap.shared"; //commands["consensus"] = "consensus"; commands["help"] = "help"; - commands["filter.seqs"] = "filter.seqs"; + commands["filter.seqs"] = "MPIEnabled"; commands["align.seqs"] = "align.seqs"; commands["summary.seqs"] = "summary.seqs"; commands["screen.seqs"] = "screen.seqs"; @@ -131,7 +131,7 @@ CommandFactory::CommandFactory(){ commands["align.check"] = "align.check"; commands["get.sharedseqs"] = "get.sharedseqs"; commands["get.otulist"] = "get.otulist"; - commands["quit"] = "quit"; + commands["quit"] = "MPIEnabled"; commands["hcluster"] = "hcluster"; commands["classify.seqs"] = "classify.seqs"; commands["phylotype"] = "phylotype"; @@ -145,6 +145,17 @@ CommandFactory::CommandFactory(){ } /***********************************************************/ +/***********************************************************/ +bool CommandFactory::MPIEnabled(string commandName) { + bool mpi = false; + it = commands.find(commandName); + if (it != commands.end()) { + if (it->second == "MPIEnabled") { return true; } + } + return mpi; +} +/***********************************************************/ + /***********************************************************/ CommandFactory::~CommandFactory(){ _uniqueInstance = 0; diff --git a/commandfactory.hpp b/commandfactory.hpp index e1f62da..6f96e90 100644 --- a/commandfactory.hpp +++ b/commandfactory.hpp @@ -25,6 +25,7 @@ public: void setOutputDirectory(string o) { outputDir = o; } void setInputDirectory(string i) { inputDir = i; } string getOutputDir() { return outputDir; } + bool MPIEnabled(string); private: Command* command; diff --git a/engine.cpp b/engine.cpp index 3bdc484..2d5fd75 100644 --- a/engine.cpp +++ b/engine.cpp @@ -54,8 +54,18 @@ bool InteractEngine::getInput(){ mout->mothurOutEndLine(); input = getCommand(); + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { + #endif mout->mothurOutEndLine(); + #ifdef USE_MPI + } + #endif + if (mout->control_pressed) { input = "quit()"; } //allow user to omit the () on the quit command @@ -68,11 +78,22 @@ bool InteractEngine::getInput(){ if (commandName != "") { mout->executing = true; + + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) { + #endif //executes valid command Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); mout->control_pressed = 0; mout->executing = false; + + #ifdef USE_MPI + } + #endif }else { mout->mothurOut("Your input contains errors. Please try again."); mout->mothurOutEndLine(); @@ -99,21 +120,83 @@ string Engine::getCommand() { cout << nextCommand << endl; } + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + mout->mothurOutJustToLog("mothur > " + toString(nextCommand)); + + #ifdef USE_MPI + } + #endif + return nextCommand; #else string nextCommand = ""; + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + mout->mothurOut("mothur > "); + + #ifdef USE_MPI + } + #endif + getline(cin, nextCommand); + + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + mout->mothurOutJustToLog("mothur > " + toString(nextCommand)); + + #ifdef USE_MPI + } + #endif + return nextCommand; #endif #else string nextCommand = ""; - mout->mothurOut("mothur > "); - getline(cin, nextCommand); - mout->mothurOutJustToLog("mothur > " + toString(nextCommand)); - return nextCommand; + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + + mout->mothurOut("mothur > "); + + #ifdef USE_MPI + } + #endif + + getline(cin, nextCommand); + + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + + mout->mothurOutJustToLog(toString(nextCommand)); + + #ifdef USE_MPI + } + #endif + + return nextCommand; #endif mout->mothurOutEndLine(); @@ -169,10 +252,21 @@ bool BatchEngine::getInput(){ if (input[0] != '#') { + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should output to screen + #endif + mout->mothurOutEndLine(); mout->mothurOut("mothur > " + input); mout->mothurOutEndLine(); + #ifdef USE_MPI + } + #endif + if (mout->control_pressed) { input = "quit()"; } //allow user to omit the () on the quit command @@ -184,11 +278,21 @@ bool BatchEngine::getInput(){ if (commandName != "") { mout->executing = true; + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) { + #endif //executes valid command Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); mout->control_pressed = 0; mout->executing = false; + + #ifdef USE_MPI + } + #endif }else { mout->mothurOut("Invalid."); mout->mothurOutEndLine(); @@ -250,11 +354,21 @@ bool ScriptEngine::getInput(){ if (input == "") { input = "quit()"; } - + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { + #endif + mout->mothurOutEndLine(); mout->mothurOut("mothur > " + input); mout->mothurOutEndLine(); - + + #ifdef USE_MPI + } + #endif + if (mout->control_pressed) { input = "quit()"; } //allow user to omit the () on the quit command @@ -266,11 +380,21 @@ bool ScriptEngine::getInput(){ if (commandName != "") { mout->executing = true; + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) { + #endif //executes valid command Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); mout->control_pressed = 0; mout->executing = false; + + #ifdef USE_MPI + } + #endif }else { mout->mothurOut("Invalid."); mout->mothurOutEndLine(); diff --git a/filters.h b/filters.h index 4c39607..94f8d33 100644 --- a/filters.h +++ b/filters.h @@ -28,6 +28,8 @@ public: void setSoft(float s) { soft = s; } void setTrump(float t) { trump = t; } void setNumSeqs(int num) { numSeqs = num; } + vector a, t, g, c, gap; + void initialize() { a.assign(alignmentLength, 0); @@ -89,7 +91,6 @@ public: protected: string filter; - vector a, t, g, c, gap; int alignmentLength, numSeqs; float soft; char trump; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 0e33ab2..326be01 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -131,6 +131,13 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { void FilterSeqsCommand::help(){ try { + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { + #endif + m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n"); m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n"); m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n"); @@ -144,6 +151,10 @@ void FilterSeqsCommand::help(){ m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + #ifdef USE_MPI + } + #endif + } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "help"); @@ -221,7 +232,13 @@ int FilterSeqsCommand::execute() { if (m->control_pressed) { for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - + #ifdef USE_MPI + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { + #endif + m->mothurOutEndLine(); m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine(); m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine(); @@ -233,6 +250,10 @@ int FilterSeqsCommand::execute() { m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); + + #ifdef USE_MPI + } + #endif return 0; @@ -267,72 +288,156 @@ string FilterSeqsCommand::createFilter() { for (int s = 0; s < fastafileNames.size(); s++) { for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + +#ifdef USE_MPI + int pid, rc, ierr; + char* buf; + int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - ifstream inFASTA; - openInputFile(fastafileNames[s], inFASTA); - int numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); - inFASTA.close(); + MPI_Status status; + MPI_File in; + rc = MPI_Comm_size(MPI_COMM_WORLD, &processors); + rc = MPI_Comm_rank(MPI_COMM_WORLD, &pid); - numSeqs += numFastaSeqs; - - lines.push_back(new linePair(0, numFastaSeqs)); - - driverCreateFilter(F, fastafileNames[s], lines[0]); - }else{ - vector positions; - - ifstream inFASTA; - openInputFile(fastafileNames[s], inFASTA); - - string input; - while(!inFASTA.eof()){ - input = getline(inFASTA); - if (input.length() != 0) { - if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + + char* tempFileName = &(fastafileNames[s][0]); + MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &in); //comm, filename, mode, info, filepointer + + if (pid == 0) { //you are the root process + setLines(fastafileNames[s]); + + for (int j = 0; j < lines.size(); j++) { //each process + if (j != 0) { //don't send to yourself + MPI_Send(&lines[j]->start, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //start position in file + MPI_Send(&lines[j]->numSeqs, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how many sequences we are sending + MPI_Send(&bufferSizes[j], 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how bytes for the read } } - inFASTA.close(); - - int numFastaSeqs = positions.size(); + cout << "done sending" << endl; + cout << "parent = " << pid << " lines = " << lines[pid]->start << '\t' << lines[pid]->numSeqs << " size = " << lines.size() << endl; + + buf = new char(bufferSizes[0]); + cout << pid << '\t' << bufferSizes[0] << " line 1 start pos = " << lines[1]->start << " buffer size 0 " << bufferSizes[0] << " buffer size 1 " << bufferSizes[1] << endl; + MPI_File_read_at(in, 0, buf, bufferSizes[0], MPI_CHAR, &status); + + cout << pid << " done reading " << endl; + string tempBuf = buf; + cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl; + //do your part + MPICreateFilter(F, tempBuf); + + vector temp; temp.resize(numSeqs); + + //get the frequencies from the child processes + for(int i = 0; i < ((processors-1)*5); i++) { + cout << "i = " << i << endl; + int ierr = MPI_Recv(&temp, numSeqs, MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status); + + int receiveTag = temp[temp.size()-1]; //child process added a int to the end to indicate what letter count this is for + + int sender = status.MPI_SOURCE; + + if (receiveTag == Atag) { //you are recieveing the A frequencies + for (int k = 0; k < alignmentLength; k++) { F.a[k] += temp[k]; } + }else if (receiveTag == Ttag) { //you are recieveing the T frequencies + for (int k = 0; k < alignmentLength; k++) { F.t[k] += temp[k]; } + }else if (receiveTag == Ctag) { //you are recieveing the C frequencies + for (int k = 0; k < alignmentLength; k++) { F.c[k] += temp[k]; } + }else if (receiveTag == Gtag) { //you are recieveing the G frequencies + for (int k = 0; k < alignmentLength; k++) { F.g[k] += temp[k]; } + }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies + for (int k = 0; k < alignmentLength; k++) { F.gap[k] += temp[k]; } + } + + m->mothurOut("receive tag = " + toString(receiveTag) + " " + toString(sender) + " is complete."); m->mothurOutEndLine(); + } + - numSeqs += numFastaSeqs; + }else { //i am the child process + int startPos, numLines, bufferSize; + cout << "child = " << pid << endl; + ierr = MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + ierr = MPI_Recv(&numLines, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + ierr = MPI_Recv(&bufferSize, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status); + cout << "child = " << pid << " done recv messages startpos = " << startPos << " numLines = " << numLines << " buffersize = " << bufferSize << endl; - int numSeqsPerProcessor = numFastaSeqs / processors; + + //send freqs + char* buf2 = new char(bufferSize); + MPI_File_read_at( in, startPos, buf2, bufferSize, MPI_CHAR, &status); + cout << pid << " done reading " << endl; + + string tempBuf = buf2; + cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl; + MPICreateFilter(F, tempBuf); - for (int i = 0; i < processors; i++) { - long int startPos = positions[ i * numSeqsPerProcessor ]; - if(i == processors - 1){ - numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; - } - lines.push_back(new linePair(startPos, numSeqsPerProcessor)); - } + //send my fequency counts + F.a.push_back(Atag); + int ierr = MPI_Send( &F.a[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD); + F.t.push_back(Ttag); + ierr = MPI_Send( &F.t[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD); + F.c.push_back(Ctag); + ierr = MPI_Send( &F.c[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD); + F.g.push_back(Gtag); + ierr = MPI_Send( &F.g[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD); + F.gap.push_back(Gaptag); + ierr = MPI_Send( &F.gap[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD); + + cout << "child " << pid << " done sending counts" << endl; + } - createProcessesCreateFilter(F, fastafileNames[s]); - } - #else + MPI_Barrier(MPI_COMM_WORLD); + +#else + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ ifstream inFASTA; openInputFile(fastafileNames[s], inFASTA); int numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); inFASTA.close(); - + numSeqs += numFastaSeqs; - + lines.push_back(new linePair(0, numFastaSeqs)); + + driverCreateFilter(F, fastafileNames[s], lines[0]); + }else{ + + setLines(fastafileNames[s]); + createProcessesCreateFilter(F, fastafileNames[s]); + } + #else + ifstream inFASTA; + openInputFile(fastafileNames[s], inFASTA); + int numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); - driverCreateFilter(F, lines[0], fastafileNames[s]); - #endif - + numSeqs += numFastaSeqs; + + lines.push_back(new linePair(0, numFastaSeqs)); + + driverCreateFilter(F, lines[0], fastafileNames[s]); + #endif +#endif } } - + +#ifdef USE_MPI + +//merge all frequency data and create filter string + //int pid; + //MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + //if (pid == 0) { //only one process should output to screen +#endif + + cout << "made it here" << endl; F.setNumSeqs(numSeqs); if(isTrue(vertical) == 1) { F.doVertical(); } if(soft != 0) { F.doSoft(); } - +//cout << "Filter String = " << F.getFilter() << endl; filterString = F.getFilter(); return filterString; @@ -361,8 +466,14 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } cout.flush(); } + + //report progress + if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } } - + + //report progress + if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); } + in.close(); return 0; @@ -372,6 +483,38 @@ int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* exit(1); } } +/**************************************************************************************/ +int FilterSeqsCommand::MPICreateFilter(Filters& F, string temp) { + try { + + vector seqStrings; + parseBuffer(temp, seqStrings); + + for(int i=0;icontrol_pressed) { return 1; } + + Sequence seq("", seqStrings[0]); + + if(trump != '*'){ F.doTrump(seq); } + if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } + cout.flush(); + + //report progress + if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); } + } + + //report progress + if((seqStrings.size()) % 100 != 0){ m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine(); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter"); + exit(1); + } +} + /**************************************************************************************************/ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) { @@ -408,4 +551,84 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) exit(1); } } +/**************************************************************************************************/ + +int FilterSeqsCommand::setLines(string filename) { + try { + vector positions; + map buf; + bufferSizes.clear(); + + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + ifstream inFASTA; + openInputFile(filename, inFASTA); + + string input; + int numbuf = 0; + while(!inFASTA.eof()){ + input = getline(inFASTA); + + if (input.length() != 0) { + numbuf += input.length(); + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); buf[(pos - input.length() - 1)] = numbuf; } + } + } + + inFASTA.close(); + + int numFastaSeqs = positions.size(); + + numSeqs += numFastaSeqs; + + int numSeqsPerProcessor = numFastaSeqs / processors; + + for (int i = 0; i < processors; i++) { + + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; + bufferSizes.push_back(numbuf-buf[startPos]); + }else{ + int myEnd = positions[ (i+1) * numSeqsPerProcessor ]; + bufferSizes.push_back(buf[myEnd]-buf[startPos]); + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "FilterSeqsCommand", "setLines"); + exit(1); + } +} +/**************************************************************************************************/ +int FilterSeqsCommand::parseBuffer(string file, vector& seqs) { + try { + + istringstream iss (file,istringstream::in); + string name, seqstring; + + while (iss) { + + if (m->control_pressed) { return 0; } + cout << "here" << endl; + Sequence seq(iss); + cout << "here1" << endl; + gobble(iss); + cout << seq.getName() << endl; + if (seq.getName() != "") { + seqs.push_back(seq.getAligned()); + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "FilterSeqsCommand", "parseBuffer"); + exit(1); + } +} /**************************************************************************************/ diff --git a/filterseqscommand.h b/filterseqscommand.h index 232b0ac..3cc007c 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -35,6 +35,7 @@ private: string vertical, filter, fasta, hard, outputDir, filterFileName; vector fastafileNames; int alignmentLength, processors; + vector bufferSizes; char trump; bool abort; @@ -44,6 +45,10 @@ private: string createFilter(); int createProcessesCreateFilter(Filters&, string); int driverCreateFilter(Filters&, string, linePair*); + int MPICreateFilter(Filters&, string); + int setLines(string); + int parseBuffer(string, vector&); + }; #endif diff --git a/getseqscommand.cpp b/getseqscommand.cpp index 7535e65..f40c89b 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -277,10 +277,13 @@ int GetSeqsCommand::readList(){ } //get last name - if (names.count(binnames) == 1) { newNames += binnames; } + if (names.count(binnames) == 1) { newNames += binnames + ","; } //if there are names in this bin add to new list - if (newNames != "") { newList.push_back(newNames); } + if (newNames != "") { + newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma + newList.push_back(newNames); + } } //print new listvector diff --git a/mothur.cpp b/mothur.cpp index a3eef76..6631489 100644 --- a/mothur.cpp +++ b/mothur.cpp @@ -12,6 +12,7 @@ #include "globaldata.hpp" #include "mothurout.h" + /**************************************************************************************************/ GlobalData* GlobalData::_uniqueInstance = 0; @@ -99,7 +100,11 @@ int main(int argc, char *argv[]){ m->mothurOutEndLine(); m->mothurOut("Type 'quit()' to exit program"); m->mothurOutEndLine(); - + + #ifdef USE_MPI + m->mothurOutJustToLog("Using MPI\n"); + MPI_Init(&argc, &argv); + #endif //srand(54321); srand( (unsigned)time( NULL ) ); @@ -130,7 +135,7 @@ int main(int argc, char *argv[]){ mothur = new InteractEngine(argv[0]); } - while(bail == 0) { bail = mothur->getInput(); } + while(bail == 0) { bail = mothur->getInput(); } string outputDir = mothur->getOutputDir(); string newlogFileName = outputDir + logFileName; @@ -139,7 +144,11 @@ int main(int argc, char *argv[]){ rename(logFileName.c_str(), newlogFileName.c_str()); //logfile with timestamp delete mothur; - + + #ifdef USE_MPI + MPI_Finalize(); + #endif + return 0; } catch(exception& e) { diff --git a/mothur.h b/mothur.h index 16a9844..7904425 100644 --- a/mothur.h +++ b/mothur.h @@ -47,6 +47,9 @@ #include #include +#ifdef USE_MPI + #include "mpi.h" +#endif /***********************************************************************/ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) diff --git a/mothurout.h b/mothurout.h index d9c8e5d..7b4732b 100644 --- a/mothurout.h +++ b/mothurout.h @@ -26,7 +26,6 @@ class MothurOut { void errorOut(exception&, string, string); int control_pressed; bool executing; - private: static MothurOut* _uniqueInstance; diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp index e8565d0..07e946b 100644 --- a/removeseqscommand.cpp +++ b/removeseqscommand.cpp @@ -279,10 +279,13 @@ int RemoveSeqsCommand::readList(){ } //get last name - if (names.count(binnames) == 0) { newNames += binnames; } + if (names.count(binnames) == 0) { newNames += binnames + ","; } //if there are names in this bin add to new list - if (newNames != "") { newList.push_back(newNames); } + if (newNames != "") { + newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma + newList.push_back(newNames); + } } //print new listvector diff --git a/sequence.cpp b/sequence.cpp index 94caf65..b73bfab 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -19,46 +19,101 @@ Sequence::Sequence(){ /***********************************************************************/ Sequence::Sequence(string newName, string sequence) { - - initialize(); - name = newName; - - //setUnaligned removes any gap characters for us - setUnaligned(sequence); - setAligned(sequence); - + try { + m = MothurOut::getInstance(); + initialize(); + name = newName; + + //setUnaligned removes any gap characters for us + setUnaligned(sequence); + setAligned(sequence); + } + catch(exception& e) { + m->errorOut(e, "Sequence", "Sequence"); + exit(1); + } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq -Sequence::Sequence(ifstream& fastaFile){ - - initialize(); - fastaFile >> name; - name = name.substr(1); - string sequence; - - //read comments - while ((name[0] == '#') && fastaFile) { - while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - sequence = getCommentString(fastaFile); +Sequence::Sequence(istringstream& fastaString){ + try { + m = MothurOut::getInstance(); + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + cout << pid << " after mothur instance " << &name << endl; + initialize(); + cout << "after mothur initialize" << endl; + fastaString >> name; + cout << "after name " << endl; + name = name.substr(1); + string sequence; - if (fastaFile) { - fastaFile >> name; - name = name.substr(1); - }else { - name = ""; - break; + //read comments + while ((name[0] == '#') && fastaString) { + while (fastaString) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + sequence = getCommentString(fastaString); + + if (fastaString) { + fastaString >> name; + name = name.substr(1); + }else { + name = ""; + break; + } } + cout << "after mothur comment" << endl; + //read real sequence + while (fastaString) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + cout << "after mothur name" << endl; + sequence = getSequenceString(fastaString); + cout << "after mothur sequence" << endl; + setAligned(sequence); + //setUnaligned removes any gap characters for us + setUnaligned(sequence); } - - //read real sequence - while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + catch(exception& e) { + m->errorOut(e, "Sequence", "Sequence"); + exit(1); + } +} + +//******************************************************************************************************************** +//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq +Sequence::Sequence(ifstream& fastaFile){ + try { + m = MothurOut::getInstance(); + initialize(); + fastaFile >> name; + name = name.substr(1); + string sequence; + + //read comments + while ((name[0] == '#') && fastaFile) { + while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + sequence = getCommentString(fastaFile); + + if (fastaFile) { + fastaFile >> name; + name = name.substr(1); + }else { + name = ""; + break; + } + } + + //read real sequence + while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there + + sequence = getSequenceString(fastaFile); - sequence = getSequenceString(fastaFile); - - setAligned(sequence); - //setUnaligned removes any gap characters for us - setUnaligned(sequence); + setAligned(sequence); + //setUnaligned removes any gap characters for us + setUnaligned(sequence); + } + catch(exception& e) { + m->errorOut(e, "Sequence", "Sequence"); + exit(1); + } } //******************************************************************************************************************** string Sequence::getSequenceString(ifstream& fastaFile) { @@ -108,7 +163,54 @@ string Sequence::getCommentString(ifstream& fastaFile) { exit(1); } } - +//******************************************************************************************************************** +string Sequence::getSequenceString(istringstream& fastaFile) { + try { + char letter; + string sequence = ""; + + while(fastaFile){ + letter= fastaFile.get(); + if(letter == '>'){ + fastaFile.putback(letter); + break; + } + else if(isprint(letter)){ + letter = toupper(letter); + if(letter == 'U'){letter = 'T';} + sequence += letter; + } + } + + return sequence; + } + catch(exception& e) { + m->errorOut(e, "Sequence", "getSequenceString"); + exit(1); + } +} +//******************************************************************************************************************** +//comment can contain '>' so we need to account for that +string Sequence::getCommentString(istringstream& fastaFile) { + try { + char letter; + string sequence = ""; + + while(fastaFile){ + letter=fastaFile.get(); + if((letter == '\r') || (letter == '\n')){ + gobble(fastaFile); //in case its a \r\n situation + break; + } + } + + return sequence; + } + catch(exception& e) { + m->errorOut(e, "Sequence", "getCommentString"); + exit(1); + } +} //******************************************************************************************************************** void Sequence::initialize(){ diff --git a/sequence.hpp b/sequence.hpp index f48bf13..5f84d44 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -24,6 +24,7 @@ public: Sequence(); Sequence(string, string); Sequence(ifstream&); + Sequence(istringstream&); void setName(string); void setUnaligned(string); @@ -51,6 +52,8 @@ private: void initialize(); string getSequenceString(ifstream&); string getCommentString(ifstream&); + string getSequenceString(istringstream&); + string getCommentString(istringstream&); string name; string unaligned; string aligned; diff --git a/sparsematrix.hpp b/sparsematrix.hpp index e42fdbb..c8a11d7 100644 --- a/sparsematrix.hpp +++ b/sparsematrix.hpp @@ -27,6 +27,7 @@ class SparseMatrix { public: SparseMatrix(); + ~SparseMatrix(){ while(!mins.empty() && mins.back() == NULL){ mins.pop_back(); } } int getNNodes(); void print(); //Print the contents of the matrix void print(ListVector*); //Print the contents of the matrix -- 2.39.2