X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=classify.cpp;h=36179f471da4ac5d40b563e609feb2f8d3d32e6c;hp=7726b3e00cca9de19971b70656f9581e43e97c8a;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=05c52893c6c2467381fe7e7b769d86b6209af2e1 diff --git a/classify.cpp b/classify.cpp index 7726b3e..36179f4 100644 --- a/classify.cpp +++ b/classify.cpp @@ -23,7 +23,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); } taxFile = tfile; - readTaxonomy(taxFile); + int numSeqs = 0; if (tempFile == "saved") { @@ -61,7 +61,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me names.push_back(temp.getName()); database->addSequence(temp); } - database->generateDB(); + if ((method == "kmer") && (!shortcuts)) {;} //don't print + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); database->readKmerDB(kmerFileTest); @@ -73,11 +74,6 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me database->setNumSeqs(numSeqs); - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine(); }else { @@ -200,7 +196,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } fastaFile.close(); - database->generateDB(); + if ((method == "kmer") && (!shortcuts)) {;} //don't print + else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); @@ -219,18 +216,19 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me fastaFile.close(); } #endif - + database->setNumSeqs(names.size()); - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); } - + + readTaxonomy(taxFile); + + //sanity check + bool okay = phyloTree->ErrorCheck(names); + + if (!okay) { m->control_pressed = true; } } catch(exception& e) { m->errorOut(e, "Classify", "generateDatabaseAndNames"); @@ -238,7 +236,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } /**************************************************************************************************/ -Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; } +Classify::Classify() { m = MothurOut::getInstance(); database = NULL; phyloTree=NULL; flipped=false; } /**************************************************************************************************/ int Classify::readTaxonomy(string file) { @@ -260,9 +258,6 @@ int Classify::readTaxonomy(string file) { MPI_File inMPI; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); - - //char* inFileName = new char[file.length()]; - //memcpy(inFileName, file.c_str(), file.length()); char inFileName[1024]; strcpy(inFileName, file.c_str()); @@ -300,34 +295,31 @@ int Classify::readTaxonomy(string file) { iss >> name; m->gobble(iss); iss >> taxInfo; if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); } - taxonomy[name] = taxInfo; - phyloTree->addSeqToTree(name, taxInfo); - } + if (m->inUsersGroups(name, names)) { + taxonomy[name] = taxInfo; + phyloTree->addSeqToTree(name, taxInfo); + }else { + m->mothurOut("[WARNING]: " + name + " is in your taxonomy file and not in your reference file, ignoring.\n"); + } + } MPI_File_close(&inMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case -#else - ifstream inTax; - m->openInputFile(file, inTax); - - //read template seqs and save - while (!inTax.eof()) { - inTax >> name; m->gobble(inTax); - inTax >> taxInfo; - - if (m->debug) { m->mothurOut("[DEBUG]: name = '" + name + "' tax = '" + taxInfo + "'\n"); } - - taxonomy[name] = taxInfo; - - phyloTree->addSeqToTree(name, taxInfo); - - m->gobble(inTax); - } - inTax.close(); -#endif +#else - - + taxonomy.clear(); + m->readTax(file, taxonomy); + map tempTaxonomy; + for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { + if (m->inUsersGroups(itTax->first, names)) { + phyloTree->addSeqToTree(itTax->first, itTax->second); + tempTaxonomy[itTax->first] = itTax->second; + }else { + m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n"); + } + } + taxonomy = tempTaxonomy; +#endif phyloTree->assignHeirarchyIDs(0); phyloTree->setUp(file); @@ -371,3 +363,37 @@ vector Classify::parseTax(string tax) { } /**************************************************************************************************/ +double Classify::getLogExpSum(vector probabilities, int& maxIndex){ + try { + // http://jblevins.org/notes/log-sum-exp + + double maxProb = probabilities[0]; + maxIndex = 0; + + int numProbs = (int)probabilities.size(); + + for(int i=1;i= maxProb){ + maxProb = probabilities[i]; + maxIndex = i; + } + } + + double probSum = 0.0000; + + for(int i=0;ierrorOut(e, "Classify", "getLogExpSum"); + exit(1); + } +} + +/**************************************************************************************************/ +