X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classify.cpp;h=8aa3cdb381ed7a389667ce61962d47cefac15ddd;hb=e8e13c129ba8184ec5932a090773f353f3ae3406;hp=7770999e779917996a916e87836a03c3a2a8f26b;hpb=65b6a38d00b3a72021611211e7c25392022c69ed;p=mothur.git diff --git a/classify.cpp b/classify.cpp index 7770999..8aa3cdb 100644 --- a/classify.cpp +++ b/classify.cpp @@ -47,7 +47,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); } + else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); @@ -61,7 +61,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me names.push_back(temp.getName()); database->addSequence(temp); } - database->generateDB(); + if ((method == "kmer") && (!shortcuts)) {;} //don't print + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); database->readKmerDB(kmerFileTest); @@ -89,7 +90,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me m->mothurOut("Generating search database... "); cout.flush(); #ifdef USE_MPI int pid, processors; - vector positions; + vector positions; int tag = 2001; MPI_Status status; @@ -123,7 +124,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me //create database if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); } else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); } + else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", pid); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine(); @@ -176,7 +177,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); } + else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); @@ -200,7 +201,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } fastaFile.close(); - database->generateDB(); + if ((method == "kmer") && (!shortcuts)) {;} //don't print + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); @@ -238,7 +240,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } /**************************************************************************************************/ -Classify::Classify() { m = MothurOut::getInstance(); database = NULL; } +Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; } /**************************************************************************************************/ int Classify::readTaxonomy(string file) { @@ -249,19 +251,17 @@ int Classify::readTaxonomy(string file) { m->mothurOutEndLine(); m->mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush(); - + if (m->debug) { m->mothurOut("[DEBUG]: Taxonomies read in...\n"); } + #ifdef USE_MPI int pid, num, processors; - vector positions; + vector positions; int tag = 2001; MPI_Status status; MPI_File inMPI; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); - - //char* inFileName = new char[file.length()]; - //memcpy(inFileName, file.c_str(), file.length()); char inFileName[1024]; strcpy(inFileName, file.c_str()); @@ -296,30 +296,21 @@ int Classify::readTaxonomy(string file) { delete buf4; istringstream iss (tempBuf,istringstream::in); - iss >> name >> taxInfo; + iss >> name; m->gobble(iss); + iss >> taxInfo; + if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); } taxonomy[name] = taxInfo; phyloTree->addSeqToTree(name, taxInfo); } MPI_File_close(&inMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case -#else - ifstream inTax; - m->openInputFile(file, inTax); - - //read template seqs and save - while (!inTax.eof()) { - inTax >> name >> taxInfo; - - taxonomy[name] = taxInfo; - - phyloTree->addSeqToTree(name, taxInfo); - - m->gobble(inTax); - } - inTax.close(); +#else + + taxonomy.clear(); + m->readTax(file, taxonomy); + for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { phyloTree->addSeqToTree(itTax->first, itTax->second); } #endif - phyloTree->assignHeirarchyIDs(0); phyloTree->setUp(file); @@ -363,3 +354,37 @@ vector Classify::parseTax(string tax) { } /**************************************************************************************************/ +double Classify::getLogExpSum(vector probabilities, int& maxIndex){ + try { + // http://jblevins.org/notes/log-sum-exp + + double maxProb = probabilities[0]; + maxIndex = 0; + + int numProbs = (int)probabilities.size(); + + for(int i=1;i= maxProb){ + maxProb = probabilities[i]; + maxIndex = i; + } + } + + double probSum = 0.0000; + + for(int i=0;ierrorOut(e, "Classify", "getLogExpSum"); + exit(1); + } +} + +/**************************************************************************************************/ +