X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classify.cpp;h=71a9b258e91bf2f527e2e1a8469f2e1e8037d76d;hb=6b32d112bb60e9f7eb6d4407a4eed4c49b67bced;hp=8aa3cdb381ed7a389667ce61962d47cefac15ddd;hpb=e8e13c129ba8184ec5932a090773f353f3ae3406;p=mothur.git diff --git a/classify.cpp b/classify.cpp index 8aa3cdb..71a9b25 100644 --- a/classify.cpp +++ b/classify.cpp @@ -23,7 +23,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); } taxFile = tfile; - readTaxonomy(taxFile); + int numSeqs = 0; if (tempFile == "saved") { @@ -74,11 +74,6 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me database->setNumSeqs(numSeqs); - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine(); }else { @@ -221,18 +216,19 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me fastaFile.close(); } #endif - + database->setNumSeqs(names.size()); - //sanity check - bool okay = phyloTree->ErrorCheck(names); - - if (!okay) { m->control_pressed = true; } - m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine(); } - + + readTaxonomy(taxFile); + + //sanity check + bool okay = phyloTree->ErrorCheck(names); + + if (!okay) { m->control_pressed = true; } } catch(exception& e) { m->errorOut(e, "Classify", "generateDatabaseAndNames"); @@ -240,7 +236,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } /**************************************************************************************************/ -Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; } +Classify::Classify() { m = MothurOut::getInstance(); database = NULL; phyloTree=NULL; flipped=false; } /**************************************************************************************************/ int Classify::readTaxonomy(string file) { @@ -299,9 +295,14 @@ int Classify::readTaxonomy(string file) { iss >> name; m->gobble(iss); iss >> taxInfo; if (m->debug) { m->mothurOut("[DEBUG]: name = " + name + " tax = " + taxInfo + "\n"); } - taxonomy[name] = taxInfo; - phyloTree->addSeqToTree(name, taxInfo); - } + //commented out to save time with large templates. 10/7/13 + //if (m->inUsersGroups(name, names)) { + taxonomy[name] = taxInfo; + phyloTree->addSeqToTree(name, taxInfo); + //}else { + // m->mothurOut("[WARNING]: " + name + " is in your taxonomy file and not in your reference file, ignoring.\n"); + //} + } MPI_File_close(&inMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case @@ -309,7 +310,19 @@ int Classify::readTaxonomy(string file) { taxonomy.clear(); m->readTax(file, taxonomy); - for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { phyloTree->addSeqToTree(itTax->first, itTax->second); } + + //commented out to save time with large templates. 6/12/13 + //map tempTaxonomy; + for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { + //if (m->inUsersGroups(itTax->first, names)) { + phyloTree->addSeqToTree(itTax->first, itTax->second); + if (m->control_pressed) { break; } + //tempTaxonomy[itTax->first] = itTax->second; + // }else { + // m->mothurOut("[WARNING]: " + itTax->first + " is in your taxonomy file and not in your reference file, ignoring.\n"); + //} + } + //taxonomy = tempTaxonomy; #endif phyloTree->assignHeirarchyIDs(0); @@ -331,21 +344,8 @@ int Classify::readTaxonomy(string file) { vector Classify::parseTax(string tax) { try { vector taxons; - - tax = tax.substr(0, tax.length()-1); //get rid of last ';' - - //parse taxonomy - string individual; - while (tax.find_first_of(';') != -1) { - individual = tax.substr(0,tax.find_first_of(';')); - tax = tax.substr(tax.find_first_of(';')+1, tax.length()); - taxons.push_back(individual); - - } - //get last one - taxons.push_back(tax); - - return taxons; + m->splitAtChar(tax, taxons, ';'); + return taxons; } catch(exception& e) { m->errorOut(e, "Classify", "parseTax");