if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+ left.clear(); right.clear();
vector<SeqMap> distMapRight;
vector<SeqMap> distMapLeft;
//sort Preferences highest to lowest
sort(pref.begin(), pref.end(), comparePref);
+ for (int i = 0; i < seqs.size(); i++) { delete seqs[i]; } seqs.clear();
+
return 0;
}
m->mothurOut(outputFileName); m->mothurOutEndLine();
if (hasAccnos) { m->mothurOut(accnosFileName); m->mothurOutEndLine(); }
m->mothurOutEndLine();
-
+
+ delete chimera;
return 0;
}
commands["bootstrap.shared"] = "bootstrap.shared";
//commands["consensus"] = "consensus";
commands["help"] = "help";
- commands["filter.seqs"] = "filter.seqs";
+ commands["filter.seqs"] = "MPIEnabled";
commands["align.seqs"] = "align.seqs";
commands["summary.seqs"] = "summary.seqs";
commands["screen.seqs"] = "screen.seqs";
commands["align.check"] = "align.check";
commands["get.sharedseqs"] = "get.sharedseqs";
commands["get.otulist"] = "get.otulist";
- commands["quit"] = "quit";
+ commands["quit"] = "MPIEnabled";
commands["hcluster"] = "hcluster";
commands["classify.seqs"] = "classify.seqs";
commands["phylotype"] = "phylotype";
}
/***********************************************************/
+/***********************************************************/
+bool CommandFactory::MPIEnabled(string commandName) {
+ bool mpi = false;
+ it = commands.find(commandName);
+ if (it != commands.end()) {
+ if (it->second == "MPIEnabled") { return true; }
+ }
+ return mpi;
+}
+/***********************************************************/
+
/***********************************************************/
CommandFactory::~CommandFactory(){
_uniqueInstance = 0;
void setOutputDirectory(string o) { outputDir = o; }
void setInputDirectory(string i) { inputDir = i; }
string getOutputDir() { return outputDir; }
+ bool MPIEnabled(string);
private:
Command* command;
mout->mothurOutEndLine();
input = getCommand();
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) {
+ #endif
mout->mothurOutEndLine();
+ #ifdef USE_MPI
+ }
+ #endif
+
if (mout->control_pressed) { input = "quit()"; }
//allow user to omit the () on the quit command
if (commandName != "") {
mout->executing = true;
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+ #endif
//executes valid command
Command* command = cFactory->getCommand(commandName, options);
quitCommandCalled = command->execute();
mout->control_pressed = 0;
mout->executing = false;
+
+ #ifdef USE_MPI
+ }
+ #endif
}else {
mout->mothurOut("Your input contains errors. Please try again.");
mout->mothurOutEndLine();
cout << nextCommand << endl;
}
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
+
+ #ifdef USE_MPI
+ }
+ #endif
+
return nextCommand;
#else
string nextCommand = "";
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
mout->mothurOut("mothur > ");
+
+ #ifdef USE_MPI
+ }
+ #endif
+
getline(cin, nextCommand);
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
+
+ #ifdef USE_MPI
+ }
+ #endif
+
return nextCommand;
#endif
#else
string nextCommand = "";
- mout->mothurOut("mothur > ");
- getline(cin, nextCommand);
- mout->mothurOutJustToLog("mothur > " + toString(nextCommand));
- return nextCommand;
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
+ mout->mothurOut("mothur > ");
+
+ #ifdef USE_MPI
+ }
+ #endif
+
+ getline(cin, nextCommand);
+
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
+ mout->mothurOutJustToLog(toString(nextCommand));
+
+ #ifdef USE_MPI
+ }
+ #endif
+
+ return nextCommand;
#endif
mout->mothurOutEndLine();
if (input[0] != '#') {
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) { //only one process should output to screen
+ #endif
+
mout->mothurOutEndLine();
mout->mothurOut("mothur > " + input);
mout->mothurOutEndLine();
+ #ifdef USE_MPI
+ }
+ #endif
+
if (mout->control_pressed) { input = "quit()"; }
//allow user to omit the () on the quit command
if (commandName != "") {
mout->executing = true;
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+ #endif
//executes valid command
Command* command = cFactory->getCommand(commandName, options);
quitCommandCalled = command->execute();
mout->control_pressed = 0;
mout->executing = false;
+
+ #ifdef USE_MPI
+ }
+ #endif
}else {
mout->mothurOut("Invalid.");
mout->mothurOutEndLine();
if (input == "") { input = "quit()"; }
-
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) {
+ #endif
+
mout->mothurOutEndLine();
mout->mothurOut("mothur > " + input);
mout->mothurOutEndLine();
-
+
+ #ifdef USE_MPI
+ }
+ #endif
+
if (mout->control_pressed) { input = "quit()"; }
//allow user to omit the () on the quit command
if (commandName != "") {
mout->executing = true;
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
+ #endif
//executes valid command
Command* command = cFactory->getCommand(commandName, options);
quitCommandCalled = command->execute();
mout->control_pressed = 0;
mout->executing = false;
+
+ #ifdef USE_MPI
+ }
+ #endif
}else {
mout->mothurOut("Invalid.");
mout->mothurOutEndLine();
void setSoft(float s) { soft = s; }
void setTrump(float t) { trump = t; }
void setNumSeqs(int num) { numSeqs = num; }
+ vector<int> a, t, g, c, gap;
+
void initialize() {
a.assign(alignmentLength, 0);
protected:
string filter;
- vector<int> a, t, g, c, gap;
int alignmentLength, numSeqs;
float soft;
char trump;
void FilterSeqsCommand::help(){
try {
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) {
+ #endif
+
m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+ #ifdef USE_MPI
+ }
+ #endif
+
}
catch(exception& e) {
m->errorOut(e, "FilterSeqsCommand", "help");
if (m->control_pressed) { for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-
+ #ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ if (pid == 0) {
+ #endif
+
m->mothurOutEndLine();
m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
+
+ #ifdef USE_MPI
+ }
+ #endif
return 0;
for (int s = 0; s < fastafileNames.size(); s++) {
for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
+
+#ifdef USE_MPI
+ int pid, rc, ierr;
+ char* buf;
+ int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastafileNames[s], inFASTA);
- int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
- inFASTA.close();
+ MPI_Status status;
+ MPI_File in;
+ rc = MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ rc = MPI_Comm_rank(MPI_COMM_WORLD, &pid);
- numSeqs += numFastaSeqs;
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driverCreateFilter(F, fastafileNames[s], lines[0]);
- }else{
- vector<int> positions;
-
- ifstream inFASTA;
- openInputFile(fastafileNames[s], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
+
+ char* tempFileName = &(fastafileNames[s][0]);
+ MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &in); //comm, filename, mode, info, filepointer
+
+ if (pid == 0) { //you are the root process
+ setLines(fastafileNames[s]);
+
+ for (int j = 0; j < lines.size(); j++) { //each process
+ if (j != 0) { //don't send to yourself
+ MPI_Send(&lines[j]->start, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //start position in file
+ MPI_Send(&lines[j]->numSeqs, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how many sequences we are sending
+ MPI_Send(&bufferSizes[j], 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how bytes for the read
}
}
- inFASTA.close();
-
- int numFastaSeqs = positions.size();
+ cout << "done sending" << endl;
+ cout << "parent = " << pid << " lines = " << lines[pid]->start << '\t' << lines[pid]->numSeqs << " size = " << lines.size() << endl;
+
+ buf = new char(bufferSizes[0]);
+ cout << pid << '\t' << bufferSizes[0] << " line 1 start pos = " << lines[1]->start << " buffer size 0 " << bufferSizes[0] << " buffer size 1 " << bufferSizes[1] << endl;
+ MPI_File_read_at(in, 0, buf, bufferSizes[0], MPI_CHAR, &status);
+
+ cout << pid << " done reading " << endl;
+ string tempBuf = buf;
+ cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
+ //do your part
+ MPICreateFilter(F, tempBuf);
+
+ vector<int> temp; temp.resize(numSeqs);
+
+ //get the frequencies from the child processes
+ for(int i = 0; i < ((processors-1)*5); i++) {
+ cout << "i = " << i << endl;
+ int ierr = MPI_Recv(&temp, numSeqs, MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status);
+
+ int receiveTag = temp[temp.size()-1]; //child process added a int to the end to indicate what letter count this is for
+
+ int sender = status.MPI_SOURCE;
+
+ if (receiveTag == Atag) { //you are recieveing the A frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.a[k] += temp[k]; }
+ }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.t[k] += temp[k]; }
+ }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.c[k] += temp[k]; }
+ }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.g[k] += temp[k]; }
+ }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
+ for (int k = 0; k < alignmentLength; k++) { F.gap[k] += temp[k]; }
+ }
+
+ m->mothurOut("receive tag = " + toString(receiveTag) + " " + toString(sender) + " is complete."); m->mothurOutEndLine();
+ }
+
- numSeqs += numFastaSeqs;
+ }else { //i am the child process
+ int startPos, numLines, bufferSize;
+ cout << "child = " << pid << endl;
+ ierr = MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ ierr = MPI_Recv(&numLines, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ ierr = MPI_Recv(&bufferSize, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+ cout << "child = " << pid << " done recv messages startpos = " << startPos << " numLines = " << numLines << " buffersize = " << bufferSize << endl;
- int numSeqsPerProcessor = numFastaSeqs / processors;
+
+ //send freqs
+ char* buf2 = new char(bufferSize);
+ MPI_File_read_at( in, startPos, buf2, bufferSize, MPI_CHAR, &status);
+ cout << pid << " done reading " << endl;
+
+ string tempBuf = buf2;
+ cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
+ MPICreateFilter(F, tempBuf);
- for (int i = 0; i < processors; i++) {
- long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
+ //send my fequency counts
+ F.a.push_back(Atag);
+ int ierr = MPI_Send( &F.a[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+ F.t.push_back(Ttag);
+ ierr = MPI_Send( &F.t[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+ F.c.push_back(Ctag);
+ ierr = MPI_Send( &F.c[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+ F.g.push_back(Gtag);
+ ierr = MPI_Send( &F.g[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+ F.gap.push_back(Gaptag);
+ ierr = MPI_Send( &F.gap[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+
+ cout << "child " << pid << " done sending counts" << endl;
+ }
- createProcessesCreateFilter(F, fastafileNames[s]);
- }
- #else
+ MPI_Barrier(MPI_COMM_WORLD);
+
+#else
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
ifstream inFASTA;
openInputFile(fastafileNames[s], inFASTA);
int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
inFASTA.close();
-
+
numSeqs += numFastaSeqs;
-
+
lines.push_back(new linePair(0, numFastaSeqs));
+
+ driverCreateFilter(F, fastafileNames[s], lines[0]);
+ }else{
+
+ setLines(fastafileNames[s]);
+ createProcessesCreateFilter(F, fastafileNames[s]);
+ }
+ #else
+ ifstream inFASTA;
+ openInputFile(fastafileNames[s], inFASTA);
+ int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+ inFASTA.close();
- driverCreateFilter(F, lines[0], fastafileNames[s]);
- #endif
-
+ numSeqs += numFastaSeqs;
+
+ lines.push_back(new linePair(0, numFastaSeqs));
+
+ driverCreateFilter(F, lines[0], fastafileNames[s]);
+ #endif
+#endif
}
}
-
+
+#ifdef USE_MPI
+
+//merge all frequency data and create filter string
+ //int pid;
+ //MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ //if (pid == 0) { //only one process should output to screen
+#endif
+
+ cout << "made it here" << endl;
F.setNumSeqs(numSeqs);
if(isTrue(vertical) == 1) { F.doVertical(); }
if(soft != 0) { F.doSoft(); }
-
+//cout << "Filter String = " << F.getFilter() << endl;
filterString = F.getFilter();
return filterString;
if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); }
cout.flush();
}
+
+ //report progress
+ if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
}
-
+
+ //report progress
+ if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); }
+
in.close();
return 0;
exit(1);
}
}
+/**************************************************************************************/
+int FilterSeqsCommand::MPICreateFilter(Filters& F, string temp) {
+ try {
+
+ vector<string> seqStrings;
+ parseBuffer(temp, seqStrings);
+
+ for(int i=0;i<seqStrings.size();i++){
+
+ if (m->control_pressed) { return 1; }
+
+ Sequence seq("", seqStrings[0]);
+
+ if(trump != '*'){ F.doTrump(seq); }
+ if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); }
+ cout.flush();
+
+ //report progress
+ if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
+ }
+
+ //report progress
+ if((seqStrings.size()) % 100 != 0){ m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine(); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
+ exit(1);
+ }
+}
+
/**************************************************************************************************/
int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
exit(1);
}
}
+/**************************************************************************************************/
+
+int FilterSeqsCommand::setLines(string filename) {
+ try {
+ vector<int> positions;
+ map<int, int> buf;
+ bufferSizes.clear();
+
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+
+ ifstream inFASTA;
+ openInputFile(filename, inFASTA);
+
+ string input;
+ int numbuf = 0;
+ while(!inFASTA.eof()){
+ input = getline(inFASTA);
+
+ if (input.length() != 0) {
+ numbuf += input.length();
+ if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); buf[(pos - input.length() - 1)] = numbuf; }
+ }
+ }
+
+ inFASTA.close();
+
+ int numFastaSeqs = positions.size();
+
+ numSeqs += numFastaSeqs;
+
+ int numSeqsPerProcessor = numFastaSeqs / processors;
+
+ for (int i = 0; i < processors; i++) {
+
+ long int startPos = positions[ i * numSeqsPerProcessor ];
+ if(i == processors - 1){
+ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+ bufferSizes.push_back(numbuf-buf[startPos]);
+ }else{
+ int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
+ bufferSizes.push_back(buf[myEnd]-buf[startPos]);
+ }
+ lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "FilterSeqsCommand", "setLines");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
+ try {
+
+ istringstream iss (file,istringstream::in);
+ string name, seqstring;
+
+ while (iss) {
+
+ if (m->control_pressed) { return 0; }
+ cout << "here" << endl;
+ Sequence seq(iss);
+ cout << "here1" << endl;
+ gobble(iss);
+ cout << seq.getName() << endl;
+ if (seq.getName() != "") {
+ seqs.push_back(seq.getAligned());
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
+ exit(1);
+ }
+}
/**************************************************************************************/
string vertical, filter, fasta, hard, outputDir, filterFileName;
vector<string> fastafileNames;
int alignmentLength, processors;
+ vector<int> bufferSizes;
char trump;
bool abort;
string createFilter();
int createProcessesCreateFilter(Filters&, string);
int driverCreateFilter(Filters&, string, linePair*);
+ int MPICreateFilter(Filters&, string);
+ int setLines(string);
+ int parseBuffer(string, vector<string>&);
+
};
#endif
}
//get last name
- if (names.count(binnames) == 1) { newNames += binnames; }
+ if (names.count(binnames) == 1) { newNames += binnames + ","; }
//if there are names in this bin add to new list
- if (newNames != "") { newList.push_back(newNames); }
+ if (newNames != "") {
+ newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+ newList.push_back(newNames);
+ }
}
//print new listvector
#include "globaldata.hpp"
#include "mothurout.h"
+
/**************************************************************************************************/
GlobalData* GlobalData::_uniqueInstance = 0;
m->mothurOutEndLine();
m->mothurOut("Type 'quit()' to exit program");
m->mothurOutEndLine();
-
+
+ #ifdef USE_MPI
+ m->mothurOutJustToLog("Using MPI\n");
+ MPI_Init(&argc, &argv);
+ #endif
//srand(54321);
srand( (unsigned)time( NULL ) );
mothur = new InteractEngine(argv[0]);
}
- while(bail == 0) { bail = mothur->getInput(); }
+ while(bail == 0) { bail = mothur->getInput(); }
string outputDir = mothur->getOutputDir();
string newlogFileName = outputDir + logFileName;
rename(logFileName.c_str(), newlogFileName.c_str()); //logfile with timestamp
delete mothur;
-
+
+ #ifdef USE_MPI
+ MPI_Finalize();
+ #endif
+
return 0;
}
catch(exception& e) {
#include <ctime>
#include <limits>
+#ifdef USE_MPI
+ #include "mpi.h"
+#endif
/***********************************************************************/
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
void errorOut(exception&, string, string);
int control_pressed;
bool executing;
-
private:
static MothurOut* _uniqueInstance;
}
//get last name
- if (names.count(binnames) == 0) { newNames += binnames; }
+ if (names.count(binnames) == 0) { newNames += binnames + ","; }
//if there are names in this bin add to new list
- if (newNames != "") { newList.push_back(newNames); }
+ if (newNames != "") {
+ newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+ newList.push_back(newNames);
+ }
}
//print new listvector
/***********************************************************************/
Sequence::Sequence(string newName, string sequence) {
-
- initialize();
- name = newName;
-
- //setUnaligned removes any gap characters for us
- setUnaligned(sequence);
- setAligned(sequence);
-
+ try {
+ m = MothurOut::getInstance();
+ initialize();
+ name = newName;
+
+ //setUnaligned removes any gap characters for us
+ setUnaligned(sequence);
+ setAligned(sequence);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "Sequence");
+ exit(1);
+ }
}
//********************************************************************************************************************
//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
-Sequence::Sequence(ifstream& fastaFile){
-
- initialize();
- fastaFile >> name;
- name = name.substr(1);
- string sequence;
-
- //read comments
- while ((name[0] == '#') && fastaFile) {
- while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- sequence = getCommentString(fastaFile);
+Sequence::Sequence(istringstream& fastaString){
+ try {
+ m = MothurOut::getInstance();
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid);
+ cout << pid << " after mothur instance " << &name << endl;
+ initialize();
+ cout << "after mothur initialize" << endl;
+ fastaString >> name;
+ cout << "after name " << endl;
+ name = name.substr(1);
+ string sequence;
- if (fastaFile) {
- fastaFile >> name;
- name = name.substr(1);
- }else {
- name = "";
- break;
+ //read comments
+ while ((name[0] == '#') && fastaString) {
+ while (fastaString) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ sequence = getCommentString(fastaString);
+
+ if (fastaString) {
+ fastaString >> name;
+ name = name.substr(1);
+ }else {
+ name = "";
+ break;
+ }
}
+ cout << "after mothur comment" << endl;
+ //read real sequence
+ while (fastaString) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ cout << "after mothur name" << endl;
+ sequence = getSequenceString(fastaString);
+ cout << "after mothur sequence" << endl;
+ setAligned(sequence);
+ //setUnaligned removes any gap characters for us
+ setUnaligned(sequence);
}
-
- //read real sequence
- while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "Sequence");
+ exit(1);
+ }
+}
+
+//********************************************************************************************************************
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
+Sequence::Sequence(ifstream& fastaFile){
+ try {
+ m = MothurOut::getInstance();
+ initialize();
+ fastaFile >> name;
+ name = name.substr(1);
+ string sequence;
+
+ //read comments
+ while ((name[0] == '#') && fastaFile) {
+ while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ sequence = getCommentString(fastaFile);
+
+ if (fastaFile) {
+ fastaFile >> name;
+ name = name.substr(1);
+ }else {
+ name = "";
+ break;
+ }
+ }
+
+ //read real sequence
+ while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+
+ sequence = getSequenceString(fastaFile);
- sequence = getSequenceString(fastaFile);
-
- setAligned(sequence);
- //setUnaligned removes any gap characters for us
- setUnaligned(sequence);
+ setAligned(sequence);
+ //setUnaligned removes any gap characters for us
+ setUnaligned(sequence);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "Sequence");
+ exit(1);
+ }
}
//********************************************************************************************************************
string Sequence::getSequenceString(ifstream& fastaFile) {
exit(1);
}
}
-
+//********************************************************************************************************************
+string Sequence::getSequenceString(istringstream& fastaFile) {
+ try {
+ char letter;
+ string sequence = "";
+
+ while(fastaFile){
+ letter= fastaFile.get();
+ if(letter == '>'){
+ fastaFile.putback(letter);
+ break;
+ }
+ else if(isprint(letter)){
+ letter = toupper(letter);
+ if(letter == 'U'){letter = 'T';}
+ sequence += letter;
+ }
+ }
+
+ return sequence;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "getSequenceString");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(istringstream& fastaFile) {
+ try {
+ char letter;
+ string sequence = "";
+
+ while(fastaFile){
+ letter=fastaFile.get();
+ if((letter == '\r') || (letter == '\n')){
+ gobble(fastaFile); //in case its a \r\n situation
+ break;
+ }
+ }
+
+ return sequence;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "getCommentString");
+ exit(1);
+ }
+}
//********************************************************************************************************************
void Sequence::initialize(){
Sequence();
Sequence(string, string);
Sequence(ifstream&);
+ Sequence(istringstream&);
void setName(string);
void setUnaligned(string);
void initialize();
string getSequenceString(ifstream&);
string getCommentString(ifstream&);
+ string getSequenceString(istringstream&);
+ string getCommentString(istringstream&);
string name;
string unaligned;
string aligned;
public:
SparseMatrix();
+ ~SparseMatrix(){ while(!mins.empty() && mins.back() == NULL){ mins.pop_back(); } }
int getNNodes();
void print(); //Print the contents of the matrix
void print(ListVector*); //Print the contents of the matrix