From: Sarah Westcott Date: Fri, 11 May 2012 17:47:32 +0000 (-0400) Subject: added some debug stuff to chimera.uchime. optimized phylo.diversity to improve raref... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=0c64d4590ecba2cb19475ca0af206d85bfb2195c added some debug stuff to chimera.uchime. optimized phylo.diversity to improve rarefy speed. --- diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 026e91b..e28775f 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -12,7 +12,7 @@ //#include "uc.h" #include "sequence.hpp" #include "referencedb.h" - +#include "systemcommand.h" //********************************************************************************************************************** vector ChimeraUchimeCommand::setParameters(){ @@ -463,10 +463,20 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { string uchimeCommand; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability + if (m->debug) { + m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); + Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); + newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + } #else uchimeCommand = path + "uchime.exe"; #endif - + //test to make sure uchime exists ifstream in; uchimeCommand = m->getFullPathName(uchimeCommand); diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index 3db101a..29e2ef0 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -212,6 +212,8 @@ int PhyloDiversityCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } + int start = time(NULL); + m->setTreeFile(treefile); TreeReader* reader = new TreeReader(treefile, groupfile, namefile); vector trees = reader->getTrees(); @@ -323,6 +325,9 @@ int PhyloDiversityCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine(); + + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -429,39 +434,50 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, ma try { int numLeafNodes = randomLeaf.size(); vector mGroups = m->getGroups(); - + + map rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches. + for (int l = 0; l < numIters; l++) { random_shuffle(randomLeaf.begin(), randomLeaf.end()); - + cout << l << endl; //initialize counts map counts; - map< string, set > countedBranch; - for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; countedBranch[mGroups[j]].insert(-2); } //add dummy index to initialize countedBranch sets + vector< map > countedBranch; + for (int i = 0; i < t->getNumNodes(); i++) { + map temp; + for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; } + countedBranch.push_back(temp); + } + + for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; } for(int k = 0; k < numLeafNodes; k++){ if (m->control_pressed) { return 0; } //calc branch length of randomLeaf k - vector br = calcBranchLength(t, randomLeaf[k], countedBranch); + vector br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup); //for each group in the groups update the total branch length accounting for the names file vector groups = t->tree[randomLeaf[k]].getGroup(); for (int j = 0; j < groups.size(); j++) { - int numSeqsInGroupJ = 0; - map::iterator it; - it = t->tree[randomLeaf[k]].pcount.find(groups[j]); - if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j - numSeqsInGroupJ = it->second; - } - - if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } - - for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { - div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths - } - counts[groups[j]] += numSeqsInGroupJ; + + if (m->inUsersGroups(groups[j], mGroups)) { + int numSeqsInGroupJ = 0; + map::iterator it; + it = t->tree[randomLeaf[k]].pcount.find(groups[j]); + if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j + numSeqsInGroupJ = it->second; + } + + if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } + + for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { + div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths + } + counts[groups[j]] += numSeqsInGroupJ; + } } } @@ -556,9 +572,9 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector } //********************************************************************************************************************** //need a vector of floats one branch length for every group the node represents. -vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set >& counted){ +vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map >& counted, map roots){ try { - + //calc the branch length //while you aren't at root vector sums; @@ -567,80 +583,38 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st vector groups = t->tree[leaf].getGroup(); sums.resize(groups.size(), 0.0); - map > tempTotals; //maps node to total Branch Length - map< string, set > tempCounted; - set::iterator it; - - //you are a leaf + + //you are a leaf if(t->tree[index].getBranchLength() != -1){ for (int k = 0; k < groups.size(); k++) { sums[k] += abs(t->tree[index].getBranchLength()); - counted[groups[k]].insert(index); } } - - for (int k = 0; k < groups.size(); k++) { - tempTotals[groups[k]][index] = 0.0; - } - - index = t->tree[index].getParent(); - + + + index = t->tree[index].getParent(); + //while you aren't at root while(t->tree[index].getParent() != -1){ - + if (m->control_pressed) { return sums; } - int pcountSize = 0; for (int k = 0; k < groups.size(); k++) { - map::iterator itGroup = t->tree[index].pcount.find(groups[k]); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - - //do both your chidren have have descendants from the users groups? - int lc = t->tree[index].getLChild(); - int rc = t->tree[index].getRChild(); - - int LpcountSize = 0; - itGroup = t->tree[lc].pcount.find(groups[k]); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - - int RpcountSize = 0; - itGroup = t->tree[rc].pcount.find(groups[k]); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - - //if yes, add your childrens tempTotals - if ((LpcountSize != 0) && (RpcountSize != 0)) { - sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; - - for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); } - - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[index].getBranchLength() != -1) { - if (counted[groups[k]].count(index) == 0) { - tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength()); - tempCounted[groups[k]].insert(index); - }else{ - tempTotals[groups[k]][index] = 0.0; - } - }else { - tempTotals[groups[k]][index] = 0.0; - } - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; - - if (counted[groups[k]].count(index) == 0) { - tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength()); - tempCounted[groups[k]].insert(index); - } - - } - //cout << "temptotal = "<< tempTotals[i] << endl; - } - - index = t->tree[index].getParent(); - } - + + if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done + + if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early + if (t->tree[index].getBranchLength() != -1) { + sums[k] += abs(t->tree[index].getBranchLength()); + } + counted[index][groups[k]] = true; + } + } + index = t->tree[index].getParent(); + } + return sums; - + } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength"); @@ -648,6 +622,63 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st } } //********************************************************************************************************************** +map PhyloDiversityCommand::getRootForGroups(Tree* t){ + try { + map roots; //maps group to root for group, may not be root of tree + map done; + + //initialize root for all groups to -1 + for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; } + + for (int i = 0; i < t->getNumLeaves(); i++) { + + vector groups = t->tree[i].getGroup(); + + int index = t->tree[i].getParent(); + + for (int j = 0; j < groups.size(); j++) { + + if (done[groups[j]] == false) { //we haven't found the root for this group yet + + done[groups[j]] = true; + roots[groups[j]] = i; //set root to self to start + + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return roots; } + + //do both your chidren have have descendants from the users groups? + int lc = t->tree[index].getLChild(); + int rc = t->tree[index].getRChild(); + + int LpcountSize = 0; + map:: iterator itGroup = t->tree[lc].pcount.find(groups[j]); + if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } + + int RpcountSize = 0; + itGroup = t->tree[rc].pcount.find(groups[j]); + if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } + + if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root + roots[groups[j]] = index; + }else { ;} + + index = t->tree[index].getParent(); + } + } + } + } + + return roots; + + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h index 5d0cccf..52cd3e2 100644 --- a/phylodiversitycommand.h +++ b/phylodiversitycommand.h @@ -39,10 +39,11 @@ private: string groups, outputDir, treefile, groupfile, namefile; vector Groups, outputNames; //holds groups to be used, and outputFile names + map getRootForGroups(Tree* t); int readNamesFile(); void printData(set&, map< string, vector >&, ofstream&, int); void printSumData(map< string, vector >&, ofstream&, int); - vector calcBranchLength(Tree*, int, map< string, set >&); + vector calcBranchLength(Tree*, int, vector< map >&, map); int driver(Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&, bool); int createProcesses(vector&, Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&);