X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylodiversitycommand.cpp;h=ddd2b316d507b8477d84d529ae9fc4d88c69e518;hb=c7e8c2d15bd7cedcfdf18675cb0ea1a0dcd0e3c0;hp=bf502abe715baadd43112377c374ba218414d3d8;hpb=e150b0b0664caec517485ee6d69dcdade6dcae77;p=mothur.git diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index bf502ab..ddd2b31 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -8,6 +8,7 @@ */ #include "phylodiversitycommand.h" +#include "treereader.h" //********************************************************************************************************************** vector PhyloDiversityCommand::setParameters(){ @@ -60,7 +61,28 @@ string PhyloDiversityCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "phylodiv") { outputFileName = "phylodiv"; } + else if (type == "rarefy") { outputFileName = "phylodiv.rarefaction"; } + else if (type == "summary") { outputFileName = "phylodiv.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag"); + exit(1); + } +} //********************************************************************************************************************** PhyloDiversityCommand::PhyloDiversityCommand(){ @@ -136,44 +158,35 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { } } - m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); - m->Treenames.clear(); - m->names.clear(); - //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); - if (treefile == "not open") { abort = true; } + if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = m->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; } - } + }else { m->setTreeFile(treefile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group", true); - if (groupfile == "not open") { abort = true; } - else if (groupfile == "not found") { - //if there is a current design file, use it - groupfile = m->getGroupFile(); - if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; } - } + if (groupfile == "not open") { groupfile = ""; abort = true; } + else if (groupfile == "not found") { groupfile = ""; } + else { m->setGroupFile(groupfile); } namefile = validParameter.validFile(parameters, "name", true); - if (namefile == "not open") { abort = true; } + if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } + else { m->setNameFile(namefile); } outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); } string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } - convert(temp, freq); + m->mothurConvert(temp, freq); temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } - convert(temp, iters); + m->mothurConvert(temp, iters); temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; } rarefy = m->isTrue(temp); @@ -190,16 +203,21 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; } + + if (namefile == "") { + vector files; files.push_back(treefile); + parser.getNameFile(files); + } } } @@ -215,81 +233,34 @@ int PhyloDiversityCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } + int start = time(NULL); + m->setTreeFile(treefile); - - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - vector trees = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; } - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); - m->Groups.clear(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - - SharedUtil* util = new SharedUtil(); - util->setGroups(m->Groups, tmap->namesOfGroups, "treegroup"); //sets the groups the user wants to analyze - delete util; + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + vector trees = reader->getTrees(); + tmap = trees[0]->getTreeMap(); + delete reader; + + SharedUtil util; + vector mGroups = m->getGroups(); + vector tGroups = tmap->getNamesOfGroups(); + util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed - for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i); break; } } + for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } } + m->setGroups(mGroups); vector outputNames; //for each of the users trees for(int i = 0; i < trees.size(); i++) { - if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } + if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } ofstream outSum, outRare, outCollect; - string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.summary"; - string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.rarefaction"; - string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv"; + string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("summary"); + string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("rarefy"); + string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("phylodiv"); if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); } if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); } @@ -300,7 +271,7 @@ int PhyloDiversityCommand::execute(){ //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator vector randomLeaf; for (int j = 0; j < numLeafNodes; j++) { - if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected. + if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected. randomLeaf.push_back(j); } } @@ -315,15 +286,15 @@ int PhyloDiversityCommand::execute(){ //find largest group total int largestGroup = 0; - for (int j = 0; j < m->Groups.size(); j++) { - if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; } + for (int j = 0; j < mGroups.size(); j++) { + if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; } //initialize diversity - diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); //numSampled + diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0); //numSampled //groupA 0.0 0.0 //initialize sumDiversity - sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); + sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0); } //convert freq percentage to number @@ -337,11 +308,11 @@ int PhyloDiversityCommand::execute(){ if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); } //add other groups ending points - for (int j = 0; j < m->Groups.size(); j++) { - if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[m->Groups[j]].size()-1); } + for (int j = 0; j < mGroups.size(); j++) { + if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); } } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true); }else{ @@ -373,8 +344,11 @@ int PhyloDiversityCommand::execute(){ } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine(); + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -391,7 +365,7 @@ int PhyloDiversityCommand::execute(){ //********************************************************************************************************************** int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< string, vector >& div, map >& sumDiv, int numIters, int increment, vector& randomLeaf, set& numSampledList, ofstream& outCollect, ofstream& outSum){ try { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) int process = 1; vector processIDS; @@ -463,7 +437,7 @@ int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< } in.close(); - remove(inTemp.c_str()); + m->mothurRemove(inTemp); } #endif @@ -480,47 +454,59 @@ int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, map >& sumDiv, int numIters, int increment, vector& randomLeaf, set& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){ try { int numLeafNodes = randomLeaf.size(); - + vector mGroups = m->getGroups(); + + map rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches. + for (int l = 0; l < numIters; l++) { random_shuffle(randomLeaf.begin(), randomLeaf.end()); - + //initialize counts map counts; - map< string, set > countedBranch; - for (int j = 0; j < m->Groups.size(); j++) { counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets + vector< map > countedBranch; + for (int i = 0; i < t->getNumNodes(); i++) { + map temp; + for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; } + countedBranch.push_back(temp); + } + + for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; } for(int k = 0; k < numLeafNodes; k++){ if (m->control_pressed) { return 0; } //calc branch length of randomLeaf k - vector br = calcBranchLength(t, randomLeaf[k], countedBranch); + vector br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup); //for each group in the groups update the total branch length accounting for the names file vector groups = t->tree[randomLeaf[k]].getGroup(); for (int j = 0; j < groups.size(); j++) { - int numSeqsInGroupJ = 0; - map::iterator it; - it = t->tree[randomLeaf[k]].pcount.find(groups[j]); - if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j - numSeqsInGroupJ = it->second; - } - - if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } - - for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { - div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths - } - counts[groups[j]] += numSeqsInGroupJ; + + if (m->inUsersGroups(groups[j], mGroups)) { + int numSeqsInGroupJ = 0; + map::iterator it; + it = t->tree[randomLeaf[k]].pcount.find(groups[j]); + if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j + numSeqsInGroupJ = it->second; + } + + if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } + + for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { + div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths + } + counts[groups[j]] += numSeqsInGroupJ; + } } } if (rarefy) { //add this diversity to the sum - for (int j = 0; j < m->Groups.size(); j++) { - for (int g = 0; g < div[m->Groups[j]].size(); g++) { - sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g]; + for (int j = 0; j < mGroups.size(); j++) { + for (int g = 0; g < div[mGroups[j]].size(); g++) { + sumDiv[mGroups[j]][g] += div[mGroups[j]][g]; } } } @@ -546,15 +532,16 @@ void PhyloDiversityCommand::printSumData(map< string, vector >& div, ofst out << "Groups\tnumSampled\tphyloDiversity" << endl; out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - - for (int j = 0; j < m->Groups.size(); j++) { - int numSampled = (div[m->Groups[j]].size()-1); - out << m->Groups[j] << '\t' << numSampled << '\t'; + + vector mGroups = m->getGroups(); + for (int j = 0; j < mGroups.size(); j++) { + int numSampled = (div[mGroups[j]].size()-1); + out << mGroups[j] << '\t' << numSampled << '\t'; float score; - if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; } - else { score = div[m->Groups[j]][numSampled] / (float)numIters; } + if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; } + else { score = div[mGroups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << endl; } @@ -573,7 +560,8 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector try { out << "numSampled\t"; - for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t'; } + vector mGroups = m->getGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t'; } out << endl; out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); @@ -582,12 +570,12 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector int numSampled = *it; out << numSampled << '\t'; - - for (int j = 0; j < m->Groups.size(); j++) { - if (numSampled < div[m->Groups[j]].size()) { + + for (int j = 0; j < mGroups.size(); j++) { + if (numSampled < div[mGroups[j]].size()) { float score; - if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; } - else { score = div[m->Groups[j]][numSampled] / (float)numIters; } + if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; } + else { score = div[mGroups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << '\t'; }else { out << "NA" << '\t'; } @@ -605,9 +593,9 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector } //********************************************************************************************************************** //need a vector of floats one branch length for every group the node represents. -vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set >& counted){ +vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map >& counted, map roots){ try { - + //calc the branch length //while you aren't at root vector sums; @@ -616,127 +604,101 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st vector groups = t->tree[leaf].getGroup(); sums.resize(groups.size(), 0.0); - map > tempTotals; //maps node to total Branch Length - map< string, set > tempCounted; - set::iterator it; - - //you are a leaf + + //you are a leaf if(t->tree[index].getBranchLength() != -1){ for (int k = 0; k < groups.size(); k++) { sums[k] += abs(t->tree[index].getBranchLength()); - counted[groups[k]].insert(index); } } - - for (int k = 0; k < groups.size(); k++) { - tempTotals[groups[k]][index] = 0.0; - } - - index = t->tree[index].getParent(); - + + + index = t->tree[index].getParent(); + //while you aren't at root while(t->tree[index].getParent() != -1){ - + if (m->control_pressed) { return sums; } - int pcountSize = 0; for (int k = 0; k < groups.size(); k++) { - map::iterator itGroup = t->tree[index].pcount.find(groups[k]); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - - //do both your chidren have have descendants from the users groups? - int lc = t->tree[index].getLChild(); - int rc = t->tree[index].getRChild(); - - int LpcountSize = 0; - itGroup = t->tree[lc].pcount.find(groups[k]); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - - int RpcountSize = 0; - itGroup = t->tree[rc].pcount.find(groups[k]); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - - //if yes, add your childrens tempTotals - if ((LpcountSize != 0) && (RpcountSize != 0)) { - sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; - - for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); } - - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[index].getBranchLength() != -1) { - if (counted[groups[k]].count(index) == 0) { - tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength()); - tempCounted[groups[k]].insert(index); - }else{ - tempTotals[groups[k]][index] = 0.0; - } - }else { - tempTotals[groups[k]][index] = 0.0; - } - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; - - if (counted[groups[k]].count(index) == 0) { - tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength()); - tempCounted[groups[k]].insert(index); - } - - } - //cout << "temptotal = "<< tempTotals[i] << endl; - } - - index = t->tree[index].getParent(); - } - + + if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done + + if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early + if (t->tree[index].getBranchLength() != -1) { + sums[k] += abs(t->tree[index].getBranchLength()); + } + counted[index][groups[k]] = true; + } + } + index = t->tree[index].getParent(); + } + return sums; - + } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength"); exit(1); } } -/*****************************************************************/ -int PhyloDiversityCommand::readNamesFile() { +//********************************************************************************************************************** +map PhyloDiversityCommand::getRootForGroups(Tree* t){ try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = dupNames[i]; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; + map roots; //maps group to root for group, may not be root of tree + map done; + + //initialize root for all groups to -1 + for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; } + + for (int i = 0; i < t->getNumLeaves(); i++) { + + vector groups = t->tree[i].getGroup(); + + int index = t->tree[i].getParent(); + + for (int j = 0; j < groups.size(); j++) { + + if (done[groups[j]] == false) { //we haven't found the root for this group yet + + done[groups[j]] = true; + roots[groups[j]] = i; //set root to self to start + + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return roots; } + + //do both your chidren have have descendants from the users groups? + int lc = t->tree[index].getLChild(); + int rc = t->tree[index].getRChild(); + + int LpcountSize = 0; + map:: iterator itGroup = t->tree[lc].pcount.find(groups[j]); + if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } + + int RpcountSize = 0; + itGroup = t->tree[rc].pcount.find(groups[j]); + if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } + + if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root + roots[groups[j]] = index; + }else { ;} + + index = t->tree[index].getParent(); + } + } + } + } + + return roots; + } catch(exception& e) { - m->errorOut(e, "PhyloDiversityCommand", "readNamesFile"); + m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups"); exit(1); } } - //**********************************************************************************************************************