X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=unweighted.cpp;h=a845f9bc2fa54a5c27b0d74aed20929f1aeb756f;hp=b224996efe0bb9a2bbdb517502b93ca6d62e7edf;hb=df7e3ff9f68ef157b0328a2d353c3258c5d45d89;hpb=99d8e5dde3ee76ae28ead4070d321da195c595c9 diff --git a/unweighted.cpp b/unweighted.cpp index b224996..a845f9b 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -11,83 +11,601 @@ /**************************************************************************************************/ -EstOutput Unweighted::getValues(Tree* t) { +EstOutput Unweighted::getValues(Tree* t, int p, string o) { try { - globaldata = GlobalData::getInstance(); + processors = p; + outputDir = o; + + CountTable* ct = t->getCountTable(); + + //if the users enters no groups then give them the score of all groups + int numGroups = m->getNumGroups(); - //clear out old values - data.resize(1,0); + //calculate number of comparsions + int numComp = 0; + vector< vector > namesOfGroupCombos; + for (int r=0; r groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); + namesOfGroupCombos.push_back(groups); + } + } - double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group - double totalBL = 0.00; //all branch lengths - double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + if (numComp != 1) { + vector groups; + if (numGroups == 0) { + //get score for all users groups + for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { + if ((ct->getNamesOfGroups())[i] != "xxx") { + groups.push_back((ct->getNamesOfGroups())[i]); + } + } + namesOfGroupCombos.push_back(groups); + }else { + for (int i = 0; i < m->getNumGroups(); i++) { + groups.push_back((m->getGroups())[i]); + } + namesOfGroupCombos.push_back(groups); + } + } + + lines.clear(); + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + + data = createProcesses(t, namesOfGroupCombos, ct); + + lines.clear(); + + return data; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "getValues"); + exit(1); + } +} +/**************************************************************************************************/ + +EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, CountTable* ct) { + try { + int process = 1; + vector processIDS; - map::iterator it; //iterator to traverse pgroups - map copyIpcount; + EstOutput results; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - for(int i=t->getNumLeaves();igetNumNodes();i++){ - int lc = t->tree[i].getLChild(); //lc = vector index of left child - int rc = t->tree[i].getRChild(); //rc = vector index of right child + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); - /**********************************************************************/ - //This section adds in all lengths that are non leaf - - copyIpcount = t->tree[i].pcount; - for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) { - if (inUsersGroups(it->first, globaldata->Groups) != true) { copyIpcount.erase(it->first); } + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + EstOutput myresults; + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct); + + if (m->control_pressed) { exit(0); } + + //m->mothurOut("Merging results."); m->mothurOutEndLine(); + + //pass numSeqs to parent + ofstream out; + string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp"; + m->openOutputFile(tempFile, out); + out << myresults.size() << endl; + for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); } + } + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); + + //force parent to wait until all the processes are done + for (int i=0;i<(processors-1);i++) { + int temp = processIDS[i]; + wait(&temp); + } + + if (m->control_pressed) { return results; } + + //get data created by processes + for (int i=0;i<(processors-1);i++) { + ifstream in; + string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp"; + m->openInputFile(s, in); - //if i's children are from the same group then i's pcount size will be 1 - //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want - if (copyIpcount.size() == 0) { } - else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) { UniqueBL += t->tree[i].getBranchLength(); } + //get quantiles + if (!in.eof()) { + int num; + in >> num; m->gobble(in); + + if (m->control_pressed) { break; } + + double w; + for (int j = 0; j < num; j++) { + in >> w; + results.push_back(w); + } + m->gobble(in); + } + in.close(); + m->mothurRemove(s); + } +#else + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector cts; + vector trees; + + //Create processor worker threads. + for( int i=1; icopy(ct); + Tree* copyTree = new Tree(copyCount); + copyTree->getCopy(t); + + cts.push_back(copyCount); + trees.push_back(copyTree); + + unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot); + pDataArray.push_back(tempweighted); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); } + delete cts[i]; + delete trees[i]; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + +#endif + return results; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "createProcesses"); + exit(1); + } +} +/**************************************************************************************************/ +EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, CountTable* ct) { + try { + + + EstOutput results; results.resize(num); + + int count = 0; + int total = num; + + for (int h = start; h < (start+num); h++) { + + if (m->control_pressed) { return results; } + + double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group + double totalBL = 0.00; //all branch lengths + double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + + //find a node that belongs to one of the groups in this combo + int nodeBelonging = -1; + for (int g = 0; g < namesOfGroupCombos[h].size(); g++) { + if (t->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = t->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; } + } - //add i's BL to total if it is from the groups the user wants - if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) { - totalBL += t->tree[i].getBranchLength(); + //sanity check + if (nodeBelonging == -1) { + m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); + for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); } + m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]); + m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW; + }else{ + //cout << "trying to get root" << endl; + //if including the root this clears rootForGrouping[namesOfGroupCombos[h]] + getRoot(t, nodeBelonging, namesOfGroupCombos[h]); + //cout << "here" << endl; + for(int i=0;igetNumNodes();i++){ + + if (m->control_pressed) { return data; } + //cout << i << endl; + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { + map::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]); + if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + } + + + //unique calc + if (pcountSize == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root + UniqueBL += abs(t->tree[i].getBranchLength()); + } + + //total calc + if (pcountSize == 0) { } + else if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root + totalBL += abs(t->tree[i].getBranchLength()); + } + } + //cout << UniqueBL << '\t' << totalBL << endl; + UW = (UniqueBL / totalBL); + + if (isnan(UW) || isinf(UW)) { UW = 0; } + + results[count] = UW; + } + count++; + + } + + return results; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "driver"); + exit(1); + } +} +/**************************************************************************************************/ + +EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, string o) { + try { + processors = p; + outputDir = o; + + CountTable* ct = t->getCountTable(); + + //if the users enters no groups then give them the score of all groups + int numGroups = m->getNumGroups(); + + //calculate number of comparsions + int numComp = 0; + vector< vector > namesOfGroupCombos; + for (int r=0; r groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); + namesOfGroupCombos.push_back(groups); + } + } + + if (numComp != 1) { + vector groups; + if (numGroups == 0) { + //get score for all users groups + for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { + if ((ct->getNamesOfGroups())[i] != "xxx") { + groups.push_back((ct->getNamesOfGroups())[i]); + } + } + namesOfGroupCombos.push_back(groups); + }else { + for (int i = 0; i < m->getNumGroups(); i++) { + groups.push_back((m->getGroups())[i]); + } + namesOfGroupCombos.push_back(groups); } + } + + lines.clear(); + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + + data = createProcesses(t, namesOfGroupCombos, true, ct); + lines.clear(); + + return data; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "getValues"); + exit(1); + } +} +/**************************************************************************************************/ + +EstOutput Unweighted::createProcesses(Tree* t, vector< vector > namesOfGroupCombos, bool usingGroups, CountTable* ct) { + try { + int process = 1; + vector processIDS; + + EstOutput results; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); - /**********************************************************************/ - //This section adds in all lengths that are leaf + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + EstOutput myresults; + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, ct); + + if (m->control_pressed) { exit(0); } + + //pass numSeqs to parent + ofstream out; + string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp"; + m->openOutputFile(tempFile, out); + out << myresults.size() << endl; + for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct); + + //force parent to wait until all the processes are done + for (int i=0;i<(processors-1);i++) { + int temp = processIDS[i]; + wait(&temp); + } + + if (m->control_pressed) { return results; } + + //get data created by processes + for (int i=0;i<(processors-1);i++) { + ifstream in; + string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp"; + m->openInputFile(s, in); - //if i's chidren are leaves - if (t->tree[rc].getRChild() == -1) { - //if rc is a valid group and rc has a BL - if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) { - UniqueBL += t->tree[rc].getBranchLength(); - totalBL += t->tree[rc].getBranchLength(); + //get quantiles + if (!in.eof()) { + int num; + in >> num; m->gobble(in); + + if (m->control_pressed) { break; } + + double w; + for (int j = 0; j < num; j++) { + in >> w; + + results.push_back(w); } + m->gobble(in); } + in.close(); + m->mothurRemove(s); + } +#else + //for some reason it doesn't seem to be calculating hte random trees scores. all scores are the same even though copytree appears to be randomized. + + /* + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector cts; + vector trees; + + //Create processor worker threads. + for( int i=1; icopy(ct); + Tree* copyTree = new Tree(copyCount); + copyTree->getCopy(t); + + cts.push_back(copyCount); + trees.push_back(copyTree); + + unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot); + pDataArray.push_back(tempweighted); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); } + delete cts[i]; + delete trees[i]; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } */ + + results = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), usingGroups, ct); +#endif + return results; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "createProcesses"); + exit(1); + } +} +/**************************************************************************************************/ +EstOutput Unweighted::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num, bool usingGroups, CountTable* ct) { + try { + + EstOutput results; results.resize(num); + + int count = 0; + + Tree* copyTree = new Tree(ct); + + for (int h = start; h < (start+num); h++) { + + if (m->control_pressed) { return results; } + + //copy random tree passed in + copyTree->getCopy(t); + + //swap labels in the groups you want to compare + copyTree->assembleRandomUnifracTree(namesOfGroupCombos[h]); - if (t->tree[lc].getLChild() == -1) { - //if lc is a valid group and lc has a BL - if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) { - UniqueBL += t->tree[lc].getBranchLength(); - totalBL += t->tree[lc].getBranchLength(); - } + double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group + double totalBL = 0.00; //all branch lengths + double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; + //find a node that belongs to one of the groups in this combo + int nodeBelonging = -1; + for (int g = 0; g < namesOfGroupCombos[h].size(); g++) { + if (copyTree->groupNodeInfo[namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[namesOfGroupCombos[h][g]][0]; break; } } - /**********************************************************************/ + //sanity check + if (nodeBelonging == -1) { + m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); + for (int g = 0; g < namesOfGroupCombos[h].size()-1; g++) { m->mothurOut(namesOfGroupCombos[h][g] + "-"); } + m->mothurOut(namesOfGroupCombos[h][namesOfGroupCombos[h].size()-1]); + m->mothurOut(", skipping."); m->mothurOutEndLine(); results[count] = UW; + }else{ + + //if including the root this clears rootForGrouping[namesOfGroupCombos[h]] + getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]); + + for(int i=0;igetNumNodes();i++){ + + if (m->control_pressed) { return data; } + + //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want + //pcountSize = 2, not unique to one group + //pcountSize = 1, unique to one group + + int pcountSize = 0; + for (int j = 0; j < namesOfGroupCombos[h].size(); j++) { + map::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]); + if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + } + + //unique calc + if (pcountSize == 0) { } + else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root + UniqueBL += abs(copyTree->tree[i].getBranchLength()); + } + + //total calc + if (pcountSize == 0) { } + else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root + totalBL += abs(copyTree->tree[i].getBranchLength()); + } + + } + //cout << UniqueBL << '\t' << totalBL << endl; + UW = (UniqueBL / totalBL); + + if (isnan(UW) || isinf(UW)) { UW = 0; } + + results[count] = UW; + } + count++; + } - UW = (UniqueBL / totalBL); - - if (isnan(UW) || isinf(UW)) { UW = 0; } - - data[0] = UW; + delete copyTree; - return data; - + return results; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the Unweighted class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "Unweighted", "driver"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the Unweighted class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; +} +/**************************************************************************************************/ +int Unweighted::getRoot(Tree* t, int v, vector grouping) { + try { + //you are a leaf so get your parent + int index = t->tree[v].getParent(); + + if (includeRoot) { + rootForGrouping[grouping].clear(); + }else { + + //my parent is a potential root + rootForGrouping[grouping].insert(index); + + //while you aren't at root + while(t->tree[index].getParent() != -1){ + //cout << index << endl; + if (m->control_pressed) { return 0; } + + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + //if so I am not the root + int parent = t->tree[index].getParent(); + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); + + int sib = lc; + if (lc == index) { sib = rc; } + + map::iterator itGroup; + int pcountSize = 0; + for (int j = 0; j < grouping.size(); j++) { + map::iterator itGroup = t->tree[sib].pcount.find(grouping[j]); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } + } + + //if yes, I am not the root + if (pcountSize != 0) { + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + } + + index = parent; + } + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + while(t->tree[index].getParent() != -1){ + int parent = t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + //cout << parent << " in root" << endl; + index = parent; + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Unweighted", "getRoot"); exit(1); } - } +/**************************************************************************************************/ +