m->mothurOut("Generating search database... "); cout.flush();
#ifdef USE_MPI
- int pid;
- vector<long> positions;
+ int pid, processors;
+ vector<unsigned long int> positions;
+ int tag = 2001;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
//char* inFileName = new char[tempFile.length()];
//memcpy(inFileName, tempFile.c_str(), tempFile.length());
//delete inFileName;
if (pid == 0) { //only one process needs to scan file
- positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
+ positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
//send file positions to all processes
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(numSeqs);
- MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
//create database
database->generateDB();
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
//need to know number of template seqs for suffixdb
if (method == "suffix") {
ifstream inFASTA;
- openInputFile(tempFile, inFASTA);
- numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+ m->openInputFile(tempFile, inFASTA);
+ m->getNumSeqs(inFASTA, numSeqs);
inFASTA.close();
}
kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
ifstream kmerFileTest(kmerDBName.c_str());
- if(kmerFileTest){ needToGenerate = false; }
+ if(kmerFileTest){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+ if (GoodFile) { needToGenerate = false; }
+ }
}
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); }
if (needToGenerate) {
ifstream fastaFile;
- openInputFile(tempFile, fastaFile);
+ m->openInputFile(tempFile, fastaFile);
while (!fastaFile.eof()) {
Sequence temp(fastaFile);
- gobble(fastaFile);
+ m->gobble(fastaFile);
names.push_back(temp.getName());
-
+
database->addSequence(temp);
}
fastaFile.close();
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
database->readKmerDB(kmerFileTest);
-
+
ifstream fastaFile;
- openInputFile(tempFile, fastaFile);
+ m->openInputFile(tempFile, fastaFile);
while (!fastaFile.eof()) {
Sequence temp(fastaFile);
- gobble(fastaFile);
+ m->gobble(fastaFile);
names.push_back(temp.getName());
}
fastaFile.close();
}
-#endif
+#endif
+
database->setNumSeqs(names.size());
+ //sanity check
+ bool okay = phyloTree->ErrorCheck(names);
+
+ if (!okay) { m->control_pressed = true; }
+
m->mothurOut("DONE."); m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
#ifdef USE_MPI
- int pid, num;
- vector<long> positions;
+ int pid, num, processors;
+ vector<unsigned long int> positions;
+ int tag = 2001;
MPI_Status status;
MPI_File inMPI;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
//char* inFileName = new char[file.length()];
//memcpy(inFileName, file.c_str(), file.length());
//delete inFileName;
if (pid == 0) {
- positions = setFilePosEachLine(file, num);
+ positions = m->setFilePosEachLine(file, num);
//send file positions to all processes
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
}else{
- MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
- positions.resize(num);
- MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+ MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(num+1);
+ MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
//read file
}
MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
ifstream inTax;
- openInputFile(file, inTax);
+ m->openInputFile(file, inTax);
//read template seqs and save
while (!inTax.eof()) {
phyloTree->addSeqToTree(name, taxInfo);
- gobble(inTax);
+ m->gobble(inTax);
}
inTax.close();
#endif
phyloTree->assignHeirarchyIDs(0);
+ phyloTree->setUp(file);
+
m->mothurOut("DONE.");
m->mothurOutEndLine(); cout.flush();