X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylodiversitycommand.cpp;h=29e2ef0ba39ba4fbe73bf88949bf7a4756d50578;hb=0c64d4590ecba2cb19475ca0af206d85bfb2195c;hp=3db101a89f3c60e93ebb7961ac75bdea5524f630;hpb=72e0be6b9c80009d4dbee24e8d690ad9514dc6fb;p=mothur.git diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index 3db101a..29e2ef0 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -212,6 +212,8 @@ int PhyloDiversityCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } + int start = time(NULL); + m->setTreeFile(treefile); TreeReader* reader = new TreeReader(treefile, groupfile, namefile); vector trees = reader->getTrees(); @@ -323,6 +325,9 @@ int PhyloDiversityCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine(); + + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -429,39 +434,50 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, ma try { int numLeafNodes = randomLeaf.size(); vector mGroups = m->getGroups(); - + + map rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches. + for (int l = 0; l < numIters; l++) { random_shuffle(randomLeaf.begin(), randomLeaf.end()); - + cout << l << endl; //initialize counts map counts; - map< string, set > countedBranch; - for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; countedBranch[mGroups[j]].insert(-2); } //add dummy index to initialize countedBranch sets + vector< map > countedBranch; + for (int i = 0; i < t->getNumNodes(); i++) { + map temp; + for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; } + countedBranch.push_back(temp); + } + + for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; } for(int k = 0; k < numLeafNodes; k++){ if (m->control_pressed) { return 0; } //calc branch length of randomLeaf k - vector br = calcBranchLength(t, randomLeaf[k], countedBranch); + vector br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup); //for each group in the groups update the total branch length accounting for the names file vector groups = t->tree[randomLeaf[k]].getGroup(); for (int j = 0; j < groups.size(); j++) { - int numSeqsInGroupJ = 0; - map::iterator it; - it = t->tree[randomLeaf[k]].pcount.find(groups[j]); - if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j - numSeqsInGroupJ = it->second; - } - - if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } - - for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { - div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths - } - counts[groups[j]] += numSeqsInGroupJ; + + if (m->inUsersGroups(groups[j], mGroups)) { + int numSeqsInGroupJ = 0; + map::iterator it; + it = t->tree[randomLeaf[k]].pcount.find(groups[j]); + if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j + numSeqsInGroupJ = it->second; + } + + if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } + + for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { + div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths + } + counts[groups[j]] += numSeqsInGroupJ; + } } } @@ -556,9 +572,9 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector } //********************************************************************************************************************** //need a vector of floats one branch length for every group the node represents. -vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set >& counted){ +vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map >& counted, map roots){ try { - + //calc the branch length //while you aren't at root vector sums; @@ -567,80 +583,38 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st vector groups = t->tree[leaf].getGroup(); sums.resize(groups.size(), 0.0); - map > tempTotals; //maps node to total Branch Length - map< string, set > tempCounted; - set::iterator it; - - //you are a leaf + + //you are a leaf if(t->tree[index].getBranchLength() != -1){ for (int k = 0; k < groups.size(); k++) { sums[k] += abs(t->tree[index].getBranchLength()); - counted[groups[k]].insert(index); } } - - for (int k = 0; k < groups.size(); k++) { - tempTotals[groups[k]][index] = 0.0; - } - - index = t->tree[index].getParent(); - + + + index = t->tree[index].getParent(); + //while you aren't at root while(t->tree[index].getParent() != -1){ - + if (m->control_pressed) { return sums; } - int pcountSize = 0; for (int k = 0; k < groups.size(); k++) { - map::iterator itGroup = t->tree[index].pcount.find(groups[k]); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - - //do both your chidren have have descendants from the users groups? - int lc = t->tree[index].getLChild(); - int rc = t->tree[index].getRChild(); - - int LpcountSize = 0; - itGroup = t->tree[lc].pcount.find(groups[k]); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - - int RpcountSize = 0; - itGroup = t->tree[rc].pcount.find(groups[k]); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - - //if yes, add your childrens tempTotals - if ((LpcountSize != 0) && (RpcountSize != 0)) { - sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; - - for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); } - - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[index].getBranchLength() != -1) { - if (counted[groups[k]].count(index) == 0) { - tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength()); - tempCounted[groups[k]].insert(index); - }else{ - tempTotals[groups[k]][index] = 0.0; - } - }else { - tempTotals[groups[k]][index] = 0.0; - } - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; - - if (counted[groups[k]].count(index) == 0) { - tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength()); - tempCounted[groups[k]].insert(index); - } - - } - //cout << "temptotal = "<< tempTotals[i] << endl; - } - - index = t->tree[index].getParent(); - } - + + if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done + + if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early + if (t->tree[index].getBranchLength() != -1) { + sums[k] += abs(t->tree[index].getBranchLength()); + } + counted[index][groups[k]] = true; + } + } + index = t->tree[index].getParent(); + } + return sums; - + } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength"); @@ -648,6 +622,63 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st } } //********************************************************************************************************************** +map PhyloDiversityCommand::getRootForGroups(Tree* t){ + try { + map roots; //maps group to root for group, may not be root of tree + map done; + + //initialize root for all groups to -1 + for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; } + + for (int i = 0; i < t->getNumLeaves(); i++) { + + vector groups = t->tree[i].getGroup(); + + int index = t->tree[i].getParent(); + + for (int j = 0; j < groups.size(); j++) { + + if (done[groups[j]] == false) { //we haven't found the root for this group yet + + done[groups[j]] = true; + roots[groups[j]] = i; //set root to self to start + + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return roots; } + + //do both your chidren have have descendants from the users groups? + int lc = t->tree[index].getLChild(); + int rc = t->tree[index].getRChild(); + + int LpcountSize = 0; + map:: iterator itGroup = t->tree[lc].pcount.find(groups[j]); + if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } + + int RpcountSize = 0; + itGroup = t->tree[rc].pcount.find(groups[j]); + if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } + + if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root + roots[groups[j]] = index; + }else { ;} + + index = t->tree[index].getParent(); + } + } + } + } + + return roots; + + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups"); + exit(1); + } +} +//**********************************************************************************************************************