X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classify.cpp;h=557f17c6b85c7eb61865591ad3a8481ffa38ed38;hb=fdc1f6eaf544f695fc1511f24bddd7e6069c33ba;hp=346c76478eefdf896fe6f04659174321bc721063;hpb=c3f0a9c8f932b923f3a6fbbf143e8f4b85fd6f5f;p=mothur.git diff --git a/classify.cpp b/classify.cpp index 346c764..557f17c 100644 --- a/classify.cpp +++ b/classify.cpp @@ -17,11 +17,71 @@ /**************************************************************************************************/ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) { try { - + m = MothurOut::getInstance(); readTaxonomy(taxFile); int start = time(NULL); int numSeqs = 0; + + m->mothurOut("Generating search database... "); cout.flush(); +#ifdef USE_MPI + int pid; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + char inFileName[tempFile.length()]; + strcpy(inFileName, tempFile.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + if (pid == 0) { //only one process needs to scan file + positions = setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + }else{ + MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + positions.resize(numSeqs); + MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + } + + //create database + if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); } + else if(method == "suffix") { database = new SuffixDB(numSeqs); } + else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } + else if(method == "distance") { database = new DistanceDB(); } + else { + m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine(); + database = new KmerDB(tempFile, 8); + } + + //read file + for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } + + istringstream iss (tempBuf,istringstream::in); + + Sequence temp(iss); + if (temp.getName() != "") { + names.push_back(temp.getName()); + database->addSequence(temp); + } + } + + database->generateDB(); + MPI_File_close(&inMPI); + #else + //need to know number of template seqs for suffixdb if (method == "suffix") { ifstream inFASTA; @@ -30,8 +90,6 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, f inFASTA.close(); } - mothurOut("Generating search database... "); cout.flush(); - bool needToGenerate = true; string kmerDBName; if(method == "kmer") { @@ -45,8 +103,8 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, f else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } else if(method == "distance") { database = new DistanceDB(); } else { - mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); - mothurOutEndLine(); + m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); + m->mothurOutEndLine(); database = new KmerDB(tempFile, 8); } @@ -81,15 +139,15 @@ Classify::Classify(string tfile, string tempFile, string method, int kmerSize, f } fastaFile.close(); } - +#endif database->setNumSeqs(names.size()); - mothurOut("DONE."); mothurOutEndLine(); - mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); mothurOutEndLine(); + m->mothurOut("DONE."); m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); } catch(exception& e) { - errorOut(e, "Classify", "Classify"); + m->errorOut(e, "Classify", "Classify"); exit(1); } } @@ -99,14 +157,58 @@ void Classify::readTaxonomy(string file) { try { phyloTree = new PhyloTree(); + string name, taxInfo; + m->mothurOutEndLine(); + m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); + +#ifdef USE_MPI + int pid, num; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + char inFileName[file.length()]; + strcpy(inFileName, file.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + if (pid == 0) { + positions = setFilePosEachLine(file, num); + + //send file positions to all processes + MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs + MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos + }else{ + MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs + positions.resize(num); + MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions + } + + //read file + for(int i=0;i length) { tempBuf = tempBuf.substr(0, length); } + + istringstream iss (tempBuf,istringstream::in); + iss >> name >> taxInfo; + taxonomy[name] = taxInfo; + phyloTree->addSeqToTree(name, taxInfo); + } + + MPI_File_close(&inMPI); +#else ifstream inTax; openInputFile(file, inTax); - mothurOutEndLine(); - mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); - - string name, taxInfo; //read template seqs and save while (!inTax.eof()) { inTax >> name >> taxInfo; @@ -117,16 +219,17 @@ void Classify::readTaxonomy(string file) { gobble(inTax); } - - phyloTree->assignHeirarchyIDs(0); inTax.close(); +#endif - mothurOut("DONE."); - mothurOutEndLine(); cout.flush(); + phyloTree->assignHeirarchyIDs(0); + + m->mothurOut("DONE."); + m->mothurOutEndLine(); cout.flush(); } catch(exception& e) { - errorOut(e, "Classify", "readTaxonomy"); + m->errorOut(e, "Classify", "readTaxonomy"); exit(1); } } @@ -152,7 +255,7 @@ vector Classify::parseTax(string tax) { return taxons; } catch(exception& e) { - errorOut(e, "Classify", "parseTax"); + m->errorOut(e, "Classify", "parseTax"); exit(1); } }