X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classify.cpp;h=22728eb7a3ee52a57926e5457a33d73c9a1d7d07;hb=cd985cf388dcc4c7de8251339206aec5f7e12f1e;hp=be287a05069c394def3f1cf48257f25b863db3be;hpb=6f4b9401f7deb8aaf0d87659298308f4138cc3b0;p=mothur.git diff --git a/classify.cpp b/classify.cpp index be287a0..22728eb 100644 --- a/classify.cpp +++ b/classify.cpp @@ -27,12 +27,14 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me m->mothurOut("Generating search database... "); cout.flush(); #ifdef USE_MPI - int pid; + int pid, processors; vector positions; + int tag = 2001; MPI_Status status; MPI_File inMPI; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); //char* inFileName = new char[tempFile.length()]; //memcpy(inFileName, tempFile.c_str(), tempFile.length()); @@ -47,12 +49,14 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes - MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs - MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } }else{ - MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs - positions.resize(numSeqs); - MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + positions.resize(numSeqs+1); + MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); } //create database @@ -86,13 +90,14 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me database->generateDB(); MPI_File_close(&inMPI); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //need to know number of template seqs for suffixdb if (method == "suffix") { ifstream inFASTA; openInputFile(tempFile, inFASTA); - numSeqs = count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + getNumSeqs(inFASTA, numSeqs); inFASTA.close(); } @@ -123,7 +128,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me gobble(fastaFile); names.push_back(temp.getName()); - + database->addSequence(temp); } fastaFile.close(); @@ -148,6 +153,11 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me #endif database->setNumSeqs(names.size()); + //sanity check + bool okay = phyloTree->ErrorCheck(names); + + if (!okay) { m->control_pressed = true; } + m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); @@ -171,12 +181,14 @@ int Classify::readTaxonomy(string file) { m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); #ifdef USE_MPI - int pid, num; + int pid, num, processors; vector positions; + int tag = 2001; MPI_Status status; MPI_File inMPI; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); //char* inFileName = new char[file.length()]; //memcpy(inFileName, file.c_str(), file.length()); @@ -191,12 +203,14 @@ int Classify::readTaxonomy(string file) { positions = setFilePosEachLine(file, num); //send file positions to all processes - MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs - MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + for(int i = 1; i < processors; i++) { + MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } }else{ - MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs - positions.resize(num); - MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + positions.resize(num+1); + MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); } //read file @@ -218,6 +232,7 @@ int Classify::readTaxonomy(string file) { } MPI_File_close(&inMPI); + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else ifstream inTax; openInputFile(file, inTax); @@ -237,6 +252,8 @@ int Classify::readTaxonomy(string file) { phyloTree->assignHeirarchyIDs(0); + phyloTree->setUp(file); + m->mothurOut("DONE."); m->mothurOutEndLine(); cout.flush();