X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classify.cpp;h=5f97c7ec693a97997554683188a013b1054eb3d7;hb=372fb21ea66ced432b109225851a1b80ef0491a3;hp=f44e66c359873f6830dac72d99fb1727a73c27ad;hpb=5b72d1cf3fa48730e5bb70d59cced1e43e1fe424;p=mothur.git diff --git a/classify.cpp b/classify.cpp index f44e66c..5f97c7e 100644 --- a/classify.cpp +++ b/classify.cpp @@ -23,7 +23,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); } taxFile = tfile; - readTaxonomy(taxFile); + int numSeqs = 0; if (tempFile == "saved") { @@ -61,7 +61,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me names.push_back(temp.getName()); database->addSequence(temp); } - database->generateDB(); + if ((method == "kmer") && (!shortcuts)) {;} //don't print + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); database->readKmerDB(kmerFileTest); @@ -73,11 +74,6 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me database->setNumSeqs(numSeqs); - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine(); }else { @@ -201,7 +197,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me fastaFile.close(); if ((method == "kmer") && (!shortcuts)) {;} //don't print - else {database->generateDB(); } + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); @@ -220,18 +216,19 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me fastaFile.close(); } #endif - + database->setNumSeqs(names.size()); - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); } - + + readTaxonomy(taxFile); + + //sanity check + bool okay = phyloTree->ErrorCheck(names); + + if (!okay) { m->control_pressed = true; } } catch(exception& e) { m->errorOut(e, "Classify", "generateDatabaseAndNames"); @@ -239,7 +236,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } /**************************************************************************************************/ -Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; } +Classify::Classify() { m = MothurOut::getInstance(); database = NULL; phyloTree=NULL; flipped=false; } /**************************************************************************************************/ int Classify::readTaxonomy(string file) { @@ -298,9 +295,13 @@ int Classify::readTaxonomy(string file) { iss >> name; m->gobble(iss); iss >> taxInfo; if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); } - taxonomy[name] = taxInfo; - phyloTree->addSeqToTree(name, taxInfo); - } + if (m->inUsersGroups(name, names)) { + taxonomy[name] = taxInfo; + phyloTree->addSeqToTree(name, taxInfo); + }else { + m->mothurOut("[WARNING]: " + name + " is in your taxonomy file and not in your reference file, ignoring.\n"); + } + } MPI_File_close(&inMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case @@ -308,7 +309,19 @@ int Classify::readTaxonomy(string file) { taxonomy.clear(); m->readTax(file, taxonomy); - for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { phyloTree->addSeqToTree(itTax->first, itTax->second); } + + //commented out to save time with large templates. 6/12/13 + //map tempTaxonomy; + for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { + //if (m->inUsersGroups(itTax->first, names)) { + phyloTree->addSeqToTree(itTax->first, itTax->second); + if (m->control_pressed) { break; } + //tempTaxonomy[itTax->first] = itTax->second; + // }else { + // m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n"); + //} + } + //taxonomy = tempTaxonomy; #endif phyloTree->assignHeirarchyIDs(0); @@ -330,21 +343,8 @@ int Classify::readTaxonomy(string file) { vector Classify::parseTax(string tax) { try { vector taxons; - - tax = tax.substr(0, tax.length()-1); //get rid of last ';' - - //parse taxonomy - string individual; - while (tax.find_first_of(';') != -1) { - individual = tax.substr(0,tax.find_first_of(';')); - tax = tax.substr(tax.find_first_of(';')+1, tax.length()); - taxons.push_back(individual); - - } - //get last one - taxons.push_back(tax); - - return taxons; + m->splitAtChar(tax, taxons, ';'); + return taxons; } catch(exception& e) { m->errorOut(e, "Classify", "parseTax"); @@ -353,3 +353,37 @@ vector Classify::parseTax(string tax) { } /**************************************************************************************************/ +double Classify::getLogExpSum(vector probabilities, int& maxIndex){ + try { + // http://jblevins.org/notes/log-sum-exp + + double maxProb = probabilities[0]; + maxIndex = 0; + + int numProbs = (int)probabilities.size(); + + for(int i=1;i= maxProb){ + maxProb = probabilities[i]; + maxIndex = i; + } + } + + double probSum = 0.0000; + + for(int i=0;ierrorOut(e, "Classify", "getLogExpSum"); + exit(1); + } +} + +/**************************************************************************************************/ +