X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=phylodiversitycommand.h;h=045d2067c8bc12a0b3482b5cd50c8ea4a3fd9556;hp=df04b35d66f1c0dc36893074f6c840711395c49b;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=ca9ac1d80c62f57270b0dcd49410ebe08a8aecd6 diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h index df04b35..045d206 100644 --- a/phylodiversitycommand.h +++ b/phylodiversitycommand.h @@ -11,10 +11,9 @@ */ #include "command.hpp" -#include "treemap.h" -#include "readtree.h" +#include "counttable.h" #include "sharedutilities.h" - +#include "tree.h" class PhyloDiversityCommand : public Command { @@ -26,28 +25,170 @@ class PhyloDiversityCommand : public Command { vector setParameters(); string getCommandName() { return "phylo.diversity"; } string getCommandCategory() { return "Hypothesis Testing"; } - string getHelpString(); - + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "Faith DP (1994). Phylogenetic pattern and the quantification of organismal biodiversity. Philos Trans R Soc Lond B Biol Sci 345: 45-58. \nhttp://www.mothur.org/wiki/Phylo.diversity"; } + string getDescription() { return "phylo.diversity"; } + int execute(); void help() { m->mothurOut(getHelpString()); } private: - ReadTree* read; - TreeMap* tmap; + CountTable* ct; float freq; int iters, processors, numUniquesInName; bool abort, rarefy, summary, collect, scale; - string groups, outputDir, treefile, groupfile, namefile; + string groups, outputDir, treefile, groupfile, namefile, countfile; vector Groups, outputNames; //holds groups to be used, and outputFile names - map nameMap; + map getRootForGroups(Tree* t); int readNamesFile(); void printData(set&, map< string, vector >&, ofstream&, int); void printSumData(map< string, vector >&, ofstream&, int); - vector calcBranchLength(Tree*, int, map< string, set >&); + vector calcBranchLength(Tree*, int, vector< map >&, map); int driver(Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&, bool); int createProcesses(vector&, Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&); }; +/***********************************************************************/ +struct phylodivData { + int numIters; + MothurOut* m; + map< string, vector > div; + map > sumDiv; + map rootForGroup; + vector randomLeaf; + set numSampledList; + int increment; + Tree* t; + CountTable* ct; + bool includeRoot; + + + phylodivData(){} + phylodivData(MothurOut* mout, int ni, map< string, vector > cd, map< string, vector > csd, Tree* tree, CountTable* count, int incre, vector crl, set nsl, map rfg) { + m = mout; + t = tree; + ct = count; + div = cd; + numIters = ni; + sumDiv = csd; + increment = incre; + randomLeaf = crl; + numSampledList = nsl; + rootForGroup = rfg; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyPhyloDivThreadFunction(LPVOID lpParam){ + phylodivData* pDataArray; + pDataArray = (phylodivData*)lpParam; + try { + int numLeafNodes = pDataArray->randomLeaf.size(); + vector mGroups = pDataArray->m->getGroups(); + + for (int l = 0; l < pDataArray->numIters; l++) { + random_shuffle(pDataArray->randomLeaf.begin(), pDataArray->randomLeaf.end()); + + //initialize counts + map counts; + vector< map > countedBranch; + for (int i = 0; i < pDataArray->t->getNumNodes(); i++) { + map temp; + for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; } + countedBranch.push_back(temp); + } + + for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; } + + for(int k = 0; k < numLeafNodes; k++){ + + if (pDataArray->m->control_pressed) { return 0; } + + //calc branch length of randomLeaf k + //vector br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup); + //(Tree* t, int leaf, vector< map >& counted, map roots + ///////////////////////////////////////////////////////////////////////////////////// + vector br; + int index = pDataArray->randomLeaf[k]; + + vector groups = pDataArray->t->tree[pDataArray->randomLeaf[k]].getGroup(); + br.resize(groups.size(), 0.0); + + //you are a leaf + if(pDataArray->t->tree[index].getBranchLength() != -1){ + for (int k = 0; k < groups.size(); k++) { + br[k] += abs(pDataArray->t->tree[index].getBranchLength()); + } + } + + index = pDataArray->t->tree[index].getParent(); + + //while you aren't at root + while(pDataArray->t->tree[index].getParent() != -1){ + + if (pDataArray->m->control_pressed) { return 0; } + + for (int k = 0; k < groups.size(); k++) { + + if (index >= pDataArray->rootForGroup[groups[k]]) { countedBranch[index][groups[k]] = true; } //if you are at this groups "root", then say we are done + + if (!countedBranch[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early + if (pDataArray->t->tree[index].getBranchLength() != -1) { + br[k] += abs(pDataArray->t->tree[index].getBranchLength()); + } + countedBranch[index][groups[k]] = true; + } + } + index = pDataArray->t->tree[index].getParent(); + } + ///////////////////////////////////////////////////////////////////////////////////// + + //for each group in the groups update the total branch length accounting for the names file + groups = pDataArray->t->tree[pDataArray->randomLeaf[k]].getGroup(); + + for (int j = 0; j < groups.size(); j++) { + + if (pDataArray->m->inUsersGroups(groups[j], mGroups)) { + int numSeqsInGroupJ = 0; + map::iterator it; + it = pDataArray->t->tree[pDataArray->randomLeaf[k]].pcount.find(groups[j]); + if (it != pDataArray->t->tree[pDataArray->randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j + numSeqsInGroupJ = it->second; + } + + if (numSeqsInGroupJ != 0) { pDataArray->div[groups[j]][(counts[groups[j]]+1)] = pDataArray->div[groups[j]][counts[groups[j]]] + br[j]; } + + for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { + pDataArray->div[groups[j]][s] = pDataArray->div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths + } + counts[groups[j]] += numSeqsInGroupJ; + } + } + } + + + //add this diversity to the sum + for (int j = 0; j < mGroups.size(); j++) { + for (int g = 0; g < pDataArray->div[mGroups[j]].size(); g++) { + pDataArray->sumDiv[mGroups[j]][g] += pDataArray->div[mGroups[j]][g]; + } + } + + } + + return 0; + } + catch(exception& e) { + pDataArray->m->errorOut(e, "PhyloDiversityCommand", "MyPhyloDivThreadFunction"); + exit(1); + } +} +#endif + #endif