}
saveIn.close();
}
-FilesGood = false;
+
if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){
if (tempFile == "saved") { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
}
#endif
-
//for each word
for (int i = 0; i < numKmers; i++) {
if (m->control_pressed) { break; }
vector<int> seqsWithWordi = database->getSequencesWithKmer(i);
- map<int, int> count;
- for (int k = 0; k < genusNodes.size(); k++) { count[genusNodes[k]] = 0; }
-
//for each sequence with that word
+ vector<int> count; count.resize(genusNodes.size(), 0);
for (int j = 0; j < seqsWithWordi.size(); j++) {
- int temp = phyloTree->getIndex(names[seqsWithWordi[j]]);
+ int temp = phyloTree->getGenusIndex(names[seqsWithWordi[j]]);
count[temp]++; //increment count of seq in this genus who have this word
}
//probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1);
- wordGenusProb[i][k] = log((count[genusNodes[k]] + probabilityInTemplate) / (float) (genusTotals[k] + 1));
+ wordGenusProb[i][k] = log((count[k] + probabilityInTemplate) / (float) (genusTotals[k] + 1));
- if (count[genusNodes[k]] != 0) {
+ if (count[k] != 0) {
#ifdef USE_MPI
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
for (int i = 0; i < numGenus; i++) {
iss >> gnode >> gsize; m->gobble(iss);
- uniqueTaxonomies[gnode] = gnode;
+ uniqueTaxonomies.insert(gnode);
totals.push_back(gsize);
}
for (int i = 0; i < numGenus; i++) {
in >> gnode >> gsize; m->gobble(in);
- uniqueTaxonomies[gnode] = gnode;
+ uniqueTaxonomies.insert(gnode);
totals.push_back(gsize);
}
//use print to reassign the taxa id
taxon = getNextTaxon(seqTaxonomy, seqName);
- if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies[currentNode] = currentNode; } break; }
+ if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies.insert(currentNode); } break; }
childPointer = tree[currentNode].children.find(taxon);
name2Taxonomy[seqName] = currentNode;
}
- if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
+ if (seqTaxonomy == "") { uniqueTaxonomies.insert(currentNode); }
}
return 0;
try {
genusIndex.clear();
//generate genusIndexes
- map<int, int>::iterator it2;
- for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
-
+ set<int>::iterator it2;
+ map<int, int> temp;
+ for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(*it2); temp[*it2] = genusIndex.size()-1; }
+
+ for (map<string, int>::iterator itName = name2Taxonomy.begin(); itName != name2Taxonomy.end(); itName++) {
+ map<int, int>::iterator itTemp = temp.find(itName->second);
+ if (itTemp != temp.end()) { name2GenusNodeIndex[itName->first] = itTemp->second; }
+ else { m->mothurOut("[ERROR]: trouble making name2GenusNodeIndex, aborting.\n"); m->control_pressed = true; }
+ }
+
return genusIndex;
}
catch(exception& e) {
//print genus nodes
outTree << endl << uniqueTaxonomies.size() << endl;
- map<int, int>::iterator it2;
- for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl; }
+ set<int>::iterator it2;
+ for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << *it2 << '\t' << tree[*it2].accessions.size() << endl; }
outTree << endl;
outTree.close();
}
}
/**************************************************************************************************/
-int PhyloTree::getIndex(string seqName){
+int PhyloTree::getGenusIndex(string seqName){
try {
- map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+ map<string, int>::iterator itFind = name2GenusNodeIndex.find(seqName);
- if (itFind != name2Taxonomy.end()) { return itFind->second; }
- else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+ if (itFind != name2GenusNodeIndex.end()) { return itFind->second; }
+ else { m->mothurOut("Cannot find " + seqName + ". Could be a mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
}
catch(exception& e) {
m->errorOut(e, "PhyloTree", "get");
TaxNode get(int i);
TaxNode get(string seqName);
string getName(int i);
- int getIndex(string seqName);
+ int getGenusIndex(string seqName);
string getFullTaxonomy(string); //pass a sequence name return taxonomy
int getMaxLevel() { return maxLevel; }
vector<int> genusIndex; //holds the indexes in tree where the genus level taxonomies are stored
vector<int> totals; //holds the numSeqs at each genus level taxonomy
map<string, int> name2Taxonomy; //maps name to index in tree
- map<int, int> uniqueTaxonomies; //map of unique taxonomies
+ map<string, int> name2GenusNodeIndex;
+ set<int> uniqueTaxonomies; //map of unique taxonomies
map<int, int> leafNodes; //used to create static reference taxonomy file
//void print(int, ofstream&);
int numNodes;