if(f.peek() == ':'){
readSpecialChar(f,':',"colon");
- if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n"); readOk = -1; return -1; }
+ if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n"); readOk = -1; return -1; }
T->tree[n].setBranchLength(readBranchLength(f));
}else{
numNodes = 2*numLeaves - 1;
tree.resize(numNodes);
+
+ //initialize groupNodeInfo
+ for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
+ groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+ }
//initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
//initialize leaf nodes
if (i <= (numLeaves-1)) {
tree[i].setName(globaldata->Treenames[i]);
- vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+
+ //save group info
+ string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]);
+ vector<string> tempGroups; tempGroups.push_back(group);
tree[i].setGroup(tempGroups);
+ groupNodeInfo[group].push_back(i);
+
//set pcount and pGroup for groupname to 1.
- tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
- tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
+ tree[i].pcount[group] = 1;
+ tree[i].pGroups[group] = 1;
+
//Treemap knows name, group and index to speed up search
globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
map<string, int>::iterator itCounts;
int maxPars = 1;
+ set<string> groupsAddedForThisNode;
for (int j = 0; j < dupNames.size(); j++) {
-
+
+ string group = globaldata->gTreemap->getGroup(dupNames[j]);
+
if (dupNames[j] != name) {//you already added yourself in the constructor
- string group = globaldata->gTreemap->getGroup(dupNames[j]);
+
+ if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it
//update pcounts
itCounts = tree[i].pcount.find(group);
if(tree[i].pGroups[group] > maxPars){
maxPars = tree[i].pGroups[group];
}
- }//end if
+ }else { groupsAddedForThisNode.insert(group); } //add it so you don't add it to groupNodeInfo again
}//end for
if (maxPars > 1) { //then we have some more dominant groups
exit(1);
}
}
+/*****************************************************************/
+int Tree::assembleTree(string n) {
+ try {
+
+ //build the pGroups in non leaf nodes to be used in the parsimony calcs.
+ for (int i = numLeaves; i < numNodes; i++) {
+ if (m->control_pressed) { return 1; }
+
+ tree[i].pGroups = (mergeGroups(i));
+ tree[i].pcount = (mergeGcounts(i));
+ }
+ //float B = clock();
+ //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Tree", "assembleTree");
+ exit(1);
+ }
+}
+
/*****************************************************************/
void Tree::getCopy(Tree* copy) {
try {
tree[i].pcount = copy->tree[i].pcount;
}
+ groupNodeInfo = copy->groupNodeInfo;
+
}
catch(exception& e) {
m->errorOut(e, "Tree", "getCopy");
void Tree::randomLabels(vector<string> g) {
try {
+
+ //initialize groupNodeInfo
+ for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
+ groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+ }
for(int i = 0; i < numLeaves; i++){
int z;
tree[z].pcount = (tree[i].pcount);
tree[i].pcount = (gcount_hold);
}
+
+ for (int k = 0; k < (tree[i].getGroup()).size(); k++) { groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); }
+ for (int k = 0; k < (tree[z].getGroup()).size(); k++) { groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); }
}
}
catch(exception& e) {
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(vector<string> g) {
randomLabels(g);
- assembleTree();
+ assembleTree("noNameCounts");
}
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
randomLabels(temp);
- assembleTree();
+ assembleTree("noNameCounts");
}
/*************************************************************************************************/
//this function takes the leaf info and populates the non leaf nodes
int assembleTree();
+ int assembleTree(string);
vector<Node> tree; //the first n nodes are the leaves, where n is the number of sequences.
-
+ map< string, vector<int> > groupNodeInfo; //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file.
private:
GlobalData* globaldata;
vector<ofstream*> fastaFileNames;
vector<ofstream*> qualFileNames;
-
+ cout << "here" << endl;
if (oligoFile != "") {
m->openOutputFile(groupFile, outGroups);
for (int i = 0; i < fastaNames.size(); i++) {
#endif
}
}
-
+cout << "here " << filename << endl;
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
inFASTA.seekg(line->start);
-
+ cout << "here " << qFileName << endl;
ifstream qFile;
if(qFileName != "") { m->openInputFile(qFileName, qFile); qFile.seekg(qline->start); }
bool done = false;
int count = 0;
-
+ cout << "here" << endl;
while (!done) {
if (m->control_pressed) {
if (abort == true) { return 0; }
+ int start = time(NULL);
+
userData.resize(numComp,0); //data[0] = unweightedscore
randomData.resize(numComp,0); //data[0] = unweightedscore
//create new tree with same num nodes and leaves as users
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
if (abort == true) { return 0; }
+ int start = time(NULL);
+
Progress* reading;
if (random) { reading = new Progress("Comparing to random:", iters); }
return 0;
}
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
vector<double> D;
numGroups = globaldata->Groups.size();
+
+ vector<double> sums = getBranchLengthSums(t);
+
+ if (m->control_pressed) { return data; }
//calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
int count = 0;
vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
D.push_back(0.0000); //initialize a spot in D for each combination
+
+ //adding the wieghted sums from group i
+ for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
+ map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
+ int numSeqsInGroupI = it->second;
+
+ double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]);
- /********************************************************/
+ D[count] += weightedSum;
+ }
+
+ //adding the wieghted sums from group l
+ for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
+ map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
+ int numSeqsInGroupL = it->second;
+
+ double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]);
+
+ D[count] += weightedSum;
+ }
+
+ /********************************************************
//calculate a D value for each group combo
for(int v=0;v<t->getNumLeaves();v++){
if (m->control_pressed) { return data; }
- int index = v;
- double sum = 0.0000;
-
- //while you aren't at root
- while(t->tree[index].getParent() != -1){
-
- //if you have a BL
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
- index = t->tree[index].getParent();
- }
-
- //get last breanch length added
- if(t->tree[index].getBranchLength() != -1){
- sum += abs(t->tree[index].getBranchLength());
- }
-
//is this sum from a sequence which is in one of the users groups
if (m->inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
//is this sum from a sequence which is in this groupCombo
data.clear(); //clear out old values
+ vector<double> sums = getBranchLengthSums(t);
+
+ if (m->control_pressed) { return data; }
+
//initialize weighted score
WScore[(groupA+groupB)] = 0.0;
- float D = 0.0;
+ double D = 0.0;
vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
-
- /********************************************************/
+
+ //adding the wieghted sums from groupA
+ for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i
+ map<string, int>::iterator it = t->tree[j].pcount.find(groupA);
+ int numSeqsInGroupA = it->second;
+
+ double weightedSum = ((numSeqsInGroupA * sums[j]) / (double)tmap->seqsPerGroup[groupA]);
+
+ D += weightedSum;
+ }
+
+ //adding the wieghted sums from groupB
+ for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
+ map<string, int>::iterator it = t->tree[j].pcount.find(groupB);
+ int numSeqsInGroupB = it->second;
+
+ double weightedSum = ((numSeqsInGroupB * sums[j]) / (double)tmap->seqsPerGroup[groupB]);
+
+ D += weightedSum;
+ }
+
+
+ /********************************************************
//calculate a D value for the group combo
for(int v=0;v<t->getNumLeaves();v++){
if (m->control_pressed) { return data; }
exit(1);
}
}
-
-
-
-
-
-
+/**************************************************************************************************/
+vector<double> Weighted::getBranchLengthSums(Tree* t) {
+ try {
+
+ vector<double> sums;
+
+ for(int v=0;v<t->getNumLeaves();v++){
+
+ if (m->control_pressed) { return sums; }
+
+ int index = v;
+ double sum = 0.0000;
+
+ //while you aren't at root
+ while(t->tree[index].getParent() != -1){
+
+ //if you have a BL
+ if(t->tree[index].getBranchLength() != -1){
+ sum += abs(t->tree[index].getBranchLength());
+ }
+ index = t->tree[index].getParent();
+ }
+
+ //get last breanch length added
+ if(t->tree[index].getBranchLength() != -1){
+ sum += abs(t->tree[index].getBranchLength());
+ }
+
+ sums.push_back(sum);
+ }
+
+ return sums;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Weighted", "getBranchLengthSums");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
TreeMap* tmap;
map<string, int>::iterator it;
map<string, double> WScore; //a score for each group combination i.e. AB, AC, BC.
+
+ vector<double> getBranchLengthSums(Tree*);
};
/***********************************************************************/