X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=weighted.cpp;h=b0d06fb0078e0201975e7738d1321e7932230253;hb=372fb21ea66ced432b109225851a1b80ef0491a3;hp=6cbeae8572010f36d943121e0aceb689a460e444;hpb=257eaa172451ede9d63a0715f6cdb7336a52996b;p=mothur.git diff --git a/weighted.cpp b/weighted.cpp index 6cbeae8..b0d06fb 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -13,15 +13,15 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { try { - globaldata = GlobalData::getInstance(); - data.clear(); //clear out old values int numGroups; vector D; processors = p; outputDir = o; + + CountTable* ct = t->getCountTable(); - numGroups = globaldata->Groups.size(); + numGroups = m->getNumGroups(); if (m->control_pressed) { return data; } @@ -31,35 +31,24 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { for (int l = 0; l < i; l++) { //initialize weighted scores //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0; - vector groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]); + vector groups; groups.push_back((m->getGroups())[i]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); - }else{ - int numPairs = namesOfGroupCombos.size(); - - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); - } + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + + data = createProcesses(t, namesOfGroupCombos, ct); + + lines.clear(); - data = createProcesses(t, namesOfGroupCombos); - - lines.clear(); - } - #else - data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); - #endif - return data; } catch(exception& e) { @@ -69,14 +58,12 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { } /**************************************************************************************************/ -EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { +EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, CountTable* ct) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 1; - int num = 0; - vector processIDS; - + vector processIDS; EstOutput results; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + int process = 1; //loop through and create all the processes you want while (process != processors) { @@ -86,17 +73,12 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - EstOutput Myresults; - Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); + Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct); - m->mothurOut("Merging results."); m->mothurOutEndLine(); - //pass numSeqs to parent ofstream out; - string tempFile = outputDir + toString(getpid()) + ".weighted.results.temp"; - m->openOutputFile(tempFile, out); out << Myresults.size() << endl; @@ -104,10 +86,14 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro out.close(); exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } } - results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); //force parent to wait until all the processes are done for (int i=0;i<(processors-1);i++) { @@ -138,13 +124,50 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro m->gobble(in); } in.close(); - remove(s.c_str()); + m->mothurRemove(s); + } +#else + + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector cts; + vector trees; + + //Create processor worker threads. + for( int i=1; icopy(ct); + Tree* copyTree = new Tree(copyCount); + copyTree->getCopy(t); + + cts.push_back(copyCount); + trees.push_back(copyTree); + + weightedData* tempweighted = new weightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot); + pDataArray.push_back(tempweighted); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } - m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); - - return results; -#endif + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); } + delete cts[i]; + delete trees[i]; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + return results; } catch(exception& e) { m->errorOut(e, "Weighted", "createProcesses"); @@ -152,7 +175,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector > namesOfGro } } /**************************************************************************************************/ -EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { +EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, CountTable* ct) { try { EstOutput results; vector D; @@ -176,7 +199,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int numSeqsInGroupI = it->second; double sum = getLengthToRoot(t, t->groupNodeInfo[groupA][j], groupA, groupB); - double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groupA]); + double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groupA)); D[count] += weightedSum; } @@ -187,23 +210,18 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int numSeqsInGroupL = it->second; double sum = getLengthToRoot(t, t->groupNodeInfo[groupB][j], groupA, groupB); - double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groupB]); + double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groupB)); D[count] += weightedSum; } count++; } - + //calculate u for the group comb - for (int h = start; h < (start+num); h++) { //report progress - m->mothurOut("Processing combo: " + toString(h)); m->mothurOutEndLine(); - - int numLeaves = t->getNumLeaves(); - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize - + //m->mothurOut("Processing combo: " + toString(h)); m->mothurOutEndLine(); + string groupA = namesOfGroupCombos[h][0]; string groupB = namesOfGroupCombos[h][1]; @@ -213,56 +231,39 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, if (m->control_pressed) { return data; } double u; - int pcountSize = 0; + //int pcountSize = 0; //does this node have descendants from groupA it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { - u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA]; - pcountSize++; + u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA); }else { u = 0.00; } //does this node have descendants from group l it = t->tree[i].pcount.find(groupB); + //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { - u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; - pcountSize++; + u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB); } - u = abs(u * t->tree[i].getBranchLength()); - - nodePcountSize[i] = pcountSize; - - //if you are a leaf from a users group add to total - if (i < numLeaves) { - if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { - //cout << "added to total" << endl; + if (includeRoot) { + if (t->tree[i].getBranchLength() != -1) { + u = abs(u * t->tree[i].getBranchLength()); WScore[(groupA+groupB)] += u; } - tempTotals[i] = 0.0; //we don't care about you, or we have already added you - }else{ //if you are not a leaf - //do both your chidren have have descendants from the users groups? - int lc = t->tree[i].getLChild(); - int rc = t->tree[i].getRChild(); - - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - WScore[(groupA+groupB)] += tempTotals[lc] + tempTotals[rc]; - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; + }else { + //if this is not the root then add it + if (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0) { if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = u; - }else { - tempTotals[i] = 0.0; + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; } - }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0; //we don't care about you - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[i] = tempTotals[lc] + tempTotals[rc] + u; } - //cout << "temptotal = "<< tempTotals[i] << endl; } } + } /********************************************************/ @@ -271,7 +272,6 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, count = 0; for (int h = start; h < (start+num); h++) { UN = (WScore[namesOfGroupCombos[h][0]+namesOfGroupCombos[h][1]] / D[count]); - if (isnan(UN) || isinf(UN)) { UN = 0; } results.push_back(UN); count++; @@ -287,9 +287,10 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, /**************************************************************************************************/ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { try { - globaldata = GlobalData::getInstance(); data.clear(); //clear out old values + + CountTable* ct = t->getCountTable(); if (m->control_pressed) { return data; } @@ -306,7 +307,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { int numSeqsInGroupI = it->second; double sum = getLengthToRoot(t, t->groupNodeInfo[groups[0]][j], groups[0], groups[1]); - double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]); + double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groups[0])); D += weightedSum; } @@ -317,74 +318,50 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { int numSeqsInGroupL = it->second; double sum = getLengthToRoot(t, t->groupNodeInfo[groups[1]][j], groups[0], groups[1]); - double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]); + double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groups[1])); D += weightedSum; } - - int numLeaves = t->getNumLeaves(); - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize - + //calculate u for the group comb for(int i=0;igetNumNodes();i++){ - + if (m->control_pressed) { return data; } double u; - int pcountSize = 0; + //int pcountSize = 0; //does this node have descendants from groupA it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { - u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA]; - pcountSize++; + u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA); }else { u = 0.00; } - - + + //does this node have descendants from group l it = t->tree[i].pcount.find(groupB); //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { - u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; - pcountSize++; + u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB); } - - u = abs(u * t->tree[i].getBranchLength()); - nodePcountSize[i] = pcountSize; - - //if you are a leaf from a users group add to total - if (i < numLeaves) { - if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { - //cout << "added to total" << endl; - WScore[(groupA+groupB)] += u; + if (includeRoot) { + if (t->tree[i].getBranchLength() != -1) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; } - tempTotals[i] = 0.0; //we don't care about you, or we have already added you - }else{ //if you are not a leaf - //do both your chidren have have descendants from the users groups? - int lc = t->tree[i].getLChild(); - int rc = t->tree[i].getRChild(); - - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - WScore[(groupA+groupB)] += tempTotals[lc] + tempTotals[rc]; - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; + }else{ + //if this is not the root then add it + if (rootForGrouping[groups].count(i) == 0) { if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = u; - }else { - tempTotals[i] = 0.0; + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; } - }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0; //we don't care about you - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[i] = tempTotals[lc] + tempTotals[rc] + u; } - //cout << "temptotal = "<< tempTotals[i] << endl; } - } - + } /********************************************************/ - + //calculate weighted score for the group combination double UN; UN = (WScore[(groupA+groupB)] / D); @@ -404,72 +381,72 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) { try { double sum = 0.0; - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize - map::iterator itCount; - int index = v; //you are a leaf if(t->tree[index].getBranchLength() != -1){ sum += abs(t->tree[index].getBranchLength()); } - tempTotals[index] = 0.0; + double tempTotal = 0.0; index = t->tree[index].getParent(); + + vector grouping; grouping.push_back(groupA); grouping.push_back(groupB); + + rootForGrouping[grouping].insert(index); //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->control_pressed) { return sum; } - - int pcountSize = 0; - map::iterator itGroup = t->tree[index].pcount.find(groupA); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - itGroup = t->tree[index].pcount.find(groupB); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - - nodePcountSize[index] = pcountSize; - //do both your chidren have have descendants from the users groups? - int lc = t->tree[index].getLChild(); - int rc = t->tree[index].getRChild(); + int parent = t->tree[index].getParent(); - itCount = nodePcountSize.find(lc); - if (itCount == nodePcountSize.end()) { - int LpcountSize = 0; - itGroup = t->tree[lc].pcount.find(groupA); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - itGroup = t->tree[lc].pcount.find(groupB); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - nodePcountSize[lc] = LpcountSize; - } - - itCount = nodePcountSize.find(rc); - if (itCount == nodePcountSize.end()) { - int RpcountSize = 0; - itGroup = t->tree[rc].pcount.find(groupA); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - itGroup = t->tree[rc].pcount.find(groupB); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - nodePcountSize[rc] = RpcountSize; - } + if (includeRoot) { //add everyone + if(t->tree[index].getBranchLength() != -1){ sum += abs(t->tree[index].getBranchLength()); } + }else { - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - sum += tempTotals[lc] + tempTotals[rc]; - - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[index].getBranchLength() != -1) { - tempTotals[index] = abs(t->tree[index].getBranchLength()); - }else { - tempTotals[index] = 0.0; + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + itGroup = t->tree[sib].pcount.find(groupA); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + itGroup = t->tree[sib].pcount.find(groupB); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + + //if yes, I am not the root so add me + if (pcountSize != 0) { + if (t->tree[index].getBranchLength() != -1) { + sum += abs(t->tree[index].getBranchLength()) + tempTotal; + tempTotal = 0.0; + }else { + sum += tempTotal; + tempTotal = 0.0; + } + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent + if (t->tree[index].getBranchLength() != -1) { + tempTotal += abs(t->tree[index].getBranchLength()); + } } - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[index] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[index].getBranchLength()); } - //cout << "temptotal = "<< tempTotals[i] << endl; - index = t->tree[index].getParent(); + index = parent; + } + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + + while(t->tree[index].getParent() != -1){ + int parent = t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + index = parent; } - return sum; } catch(exception& e) { @@ -480,4 +457,3 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) { /**************************************************************************************************/ -