X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=weighted.cpp;h=30e4a7682dc4ae4ad840fcdf2f7a31921fc8c351;hb=5694c92fbf646fe01abc87bde2af59e14a9a56b6;hp=d068369cca57d73c260e56e522b51fc5eef97fea;hpb=f099fdc1e3a0d7b75d780a164e5bdb93496a7a1d;p=mothur.git diff --git a/weighted.cpp b/weighted.cpp index d068369..30e4a76 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -22,15 +22,13 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { outputDir = o; numGroups = globaldata->Groups.size(); - - vector sums = getBranchLengthSums(t); if (m->control_pressed) { return data; } //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; vector< vector > namesOfGroupCombos; for (int i=0; iGroups[i]+globaldata->Groups[l]] = 0.0; vector groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]); @@ -40,7 +38,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); }else{ int numPairs = namesOfGroupCombos.size(); @@ -51,15 +49,15 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(new linePair(startPos, numPairsPerProcessor)); + lines.push_back(linePair(startPos, numPairsPerProcessor)); } - data = createProcesses(t, namesOfGroupCombos, sums); + data = createProcesses(t, namesOfGroupCombos); - for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + lines.clear(); } #else - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), sums); + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); #endif return data; @@ -71,11 +69,10 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { } /**************************************************************************************************/ -EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, vector& sums) { +EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; - int num = 0; + int process = 1; vector processIDS; EstOutput results; @@ -88,64 +85,63 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - results = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num, sums); - - if (m->control_pressed) { exit(0); } - + + EstOutput Myresults; + Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); + m->mothurOut("Merging results."); m->mothurOutEndLine(); //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".results.temp"; + + string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp"; + m->openOutputFile(tempFile, out); - out << results.size() << endl; - for (int i = 0; i < results.size(); i++) { out << results[i] << '\t'; } out << endl; + + out << Myresults.size() << endl; + for (int i = 0; i < Myresults.size(); i++) { out << Myresults[i] << '\t'; } out << endl; out.close(); exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } } - + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); + //force parent to wait until all the processes are done - for (int i=0;icontrol_pressed) { return results; } //get data created by processes - for (int i=0;iopenInputFile(s, in); - vector r; - //get quantiles while (!in.eof()) { int num; - in >> num; + in >> num; m->gobble(in); if (m->control_pressed) { break; } - - m->gobble(in); double w; for (int j = 0; j < num; j++) { in >> w; - r.push_back(w); + results.push_back(w); } m->gobble(in); } in.close(); remove(s.c_str()); - - //save quan in quantiles - for (int j = 0; j < r.size(); j++) { - //put all values of r into results - results.push_back(r[j]); - } } m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -159,17 +155,12 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro } } /**************************************************************************************************/ -EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, vector& sums) { +EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { try { - globaldata = GlobalData::getInstance(); - EstOutput results; vector D; int count = 0; - int total = start+num; - int twentyPercent = (total * 0.20); - for (int h = start; h < (start+num); h++) { if (m->control_pressed) { return results; } @@ -178,6 +169,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, string groupA = namesOfGroupCombos[h][0]; string groupB = namesOfGroupCombos[h][1]; + set validBranches; WScore[groupA+groupB] = 0.0; D.push_back(0.0000); //initialize a spot in D for each combination @@ -186,62 +178,67 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, map::iterator it = t->tree[t->groupNodeInfo[groupA][j]].pcount.find(groupA); int numSeqsInGroupI = it->second; - double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groupA][j]]) / (double)tmap->seqsPerGroup[groupA]); + double sum = getLengthToRoot(t, t->groupNodeInfo[groupA][j], groupA, groupB); + double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groupA]); D[count] += weightedSum; } - + //adding the wieghted sums from group l for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l map::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB); int numSeqsInGroupL = it->second; - double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groupB][j]]) / (double)tmap->seqsPerGroup[groupB]); + double sum = getLengthToRoot(t, t->groupNodeInfo[groupB][j], groupA, groupB); + double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groupB]); D[count] += weightedSum; } - count++; - //report progress - if((h) % twentyPercent == 0){ m->mothurOut("Percentage complete: " + toString(int((h / (float)total) * 100.0))); m->mothurOutEndLine(); } + count++; } - m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine(); - //calculate u for the group comb - for(int i=0;igetNumNodes();i++){ - for (int h = start; h < (start+num); h++) { + + for (int h = start; h < (start+num); h++) { + //report progress + m->mothurOut("Processing combo: " + toString(h)); m->mothurOutEndLine(); + + string groupA = namesOfGroupCombos[h][0]; + string groupB = namesOfGroupCombos[h][1]; + + //calculate u for the group comb + for(int i=0;igetNumNodes();i++){ - string groupA = namesOfGroupCombos[h][0]; - string groupB = namesOfGroupCombos[h][1]; - - if (m->control_pressed) { return results; } + if (m->control_pressed) { return data; } double u; + //int pcountSize = 0; //does this node have descendants from groupA it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA]; }else { u = 0.00; } - - + + //does this node have descendants from group l it = t->tree[i].pcount.find(groupB); //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; } - - u = abs(u * t->tree[i].getBranchLength()); - - //save groupcombs u value - WScore[(groupA+groupB)] += u; + + //if this is not the root then add it + if (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; + } + } } /********************************************************/ - //calculate weighted score for the group combination double UN; count = 0; @@ -261,7 +258,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, } } /**************************************************************************************************/ -EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector& sums) { +EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { try { globaldata = GlobalData::getInstance(); @@ -272,6 +269,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector validBranches; vector groups; groups.push_back(groupA); groups.push_back(groupB); @@ -280,7 +278,8 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]); int numSeqsInGroupI = it->second; - double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]); + double sum = getLengthToRoot(t, t->groupNodeInfo[groups[0]][j], groups[0], groups[1]); + double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]); D += weightedSum; } @@ -290,38 +289,40 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]); int numSeqsInGroupL = it->second; - double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]); + double sum = getLengthToRoot(t, t->groupNodeInfo[groups[1]][j], groups[0], groups[1]); + double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]); D += weightedSum; } - + //calculate u for the group comb for(int i=0;igetNumNodes();i++){ - + if (m->control_pressed) { return data; } double u; + //int pcountSize = 0; //does this node have descendants from groupA it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA]; }else { u = 0.00; } - - + + //does this node have descendants from group l it = t->tree[i].pcount.find(groupB); //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; } - - u = abs(u * t->tree[i].getBranchLength()); - - //save groupcombs u value - WScore[(groupA+groupB)] += u; - } - + + //if this is not the root then add it + if (rootForGrouping[groups].count(i) == 0) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; + } + } /********************************************************/ //calculate weighted score for the group combination @@ -339,37 +340,71 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB, vector Weighted::getBranchLengthSums(Tree* t) { +double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) { try { - vector sums; + double sum = 0.0; + int index = v; + + //you are a leaf + if(t->tree[index].getBranchLength() != -1){ sum += abs(t->tree[index].getBranchLength()); } + double tempTotal = 0.0; + index = t->tree[index].getParent(); + + vector grouping; grouping.push_back(groupA); grouping.push_back(groupB); - for(int v=0;vgetNumLeaves();v++){ + rootForGrouping[grouping].insert(index); - if (m->control_pressed) { return sums; } + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return sum; } - int index = v; - double sum = 0.0000; - - //while you aren't at root - while(t->tree[index].getParent() != -1){ - - //if you have a BL - if(t->tree[index].getBranchLength() != -1){ - sum += abs(t->tree[index].getBranchLength()); + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + int parent = t->tree[index].getParent(); + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + itGroup = t->tree[sib].pcount.find(groupA); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + itGroup = t->tree[sib].pcount.find(groupB); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + + //if yes, I am not the root so add me + if (pcountSize != 0) { + if (t->tree[index].getBranchLength() != -1) { + sum += abs(t->tree[index].getBranchLength()) + tempTotal; + tempTotal = 0.0; + }else { + sum += tempTotal; + tempTotal = 0.0; + } + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent + if (t->tree[index].getBranchLength() != -1) { + tempTotal += abs(t->tree[index].getBranchLength()); } - index = t->tree[index].getParent(); - } - - //get last breanch length added - if(t->tree[index].getBranchLength() != -1){ - sum += abs(t->tree[index].getBranchLength()); } - sums.push_back(sum); + index = parent; + } + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + while(t->tree[index].getParent() != -1){ + int parent = t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + index = parent; } - return sums; + return sum; } catch(exception& e) { m->errorOut(e, "Weighted", "getBranchLengthSums"); @@ -379,4 +414,3 @@ vector Weighted::getBranchLengthSums(Tree* t) { /**************************************************************************************************/ -