X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=phylodiversitycommand.cpp;h=339649c7dcc814ac409e5b8ec1dfd432b544e43d;hp=b0c11f68ff26dad59634375c80c44ecf0dd0382b;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=6c2b1e530a5c0bb87040e58a3e410097acdfcc3d diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index b0c11f6..339649c 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -14,20 +14,20 @@ vector PhyloDiversityCommand::setParameters(){ try { - CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree); - CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); - CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy); - CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary); - CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect); - CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","phylodiv",false,true,true); parameters.push_back(ptree); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "","rarefy",false,false); parameters.push_back(prarefy); + CommandParameter psummary("summary", "Boolean", "", "T", "", "", "","summary",false,false); parameters.push_back(psummary); + CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcollect); + CommandParameter pscale("scale", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pscale); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -63,26 +63,21 @@ string PhyloDiversityCommand::getHelpString(){ } } //********************************************************************************************************************** -string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string PhyloDiversityCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "phylodiv") { outputFileName = "phylodiv"; } - else if (type == "rarefy") { outputFileName = "phylodiv.rarefaction"; } - else if (type == "summary") { outputFileName = "phylodiv.summary"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "phylodiv") { pattern = "[filename],[tag],phylodiv"; } + else if (type == "rarefy") { pattern = "[filename],[tag],phylodiv.rarefaction"; } + else if (type == "summary") { pattern = "[filename],[tag],phylodiv.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "PhyloDiversityCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** @@ -284,9 +279,12 @@ int PhyloDiversityCommand::execute(){ if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } ofstream outSum, outRare, outCollect; - string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("summary"); - string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("rarefy"); - string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("phylodiv"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile)); + variables["[tag]"] = toString(i+1); + string outSumFile = getOutputFileName("summary",variables); + string outRareFile = getOutputFileName("rarefy",variables); + string outCollectFile = getOutputFileName("phylodiv",variables); if (summary) { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); } if (rarefy) { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); } @@ -301,6 +299,31 @@ int PhyloDiversityCommand::execute(){ randomLeaf.push_back(j); } } + + /* float sum = 0; + vector sums; sums.resize(m->getGroups().size(), 0); + vector sumsAboveRoot; sumsAboveRoot.resize(m->getGroups().size(), 0); + for (int j = 0; j < trees[i]->getNumNodes(); j++) { + if (trees[i]->tree[j].getBranchLength() < 0) { cout << j << '\t' << trees[i]->tree[j].getName() << '\t' << trees[i]->tree[j].getBranchLength() << endl; } + + sum += abs(trees[i]->tree[j].getBranchLength()); + for (int k = 0; k < m->getGroups().size(); k++) { + map::iterator itGroup = trees[i]->tree[j].pcount.find(m->getGroups()[k]); + if (itGroup != trees[i]->tree[j].pcount.end()) { //this branch belongs to a group we care about + if (j < rootForGroup[m->getGroups()[k]]) { + sums[k] += abs(trees[i]->tree[j].getBranchLength()); + }else { + sumsAboveRoot[k] += abs(trees[i]->tree[j].getBranchLength()); + } + } + } + } + cout << sum << endl; //exit(1); + + for (int k = 0; k < m->getGroups().size(); k++) { + cout << m->getGroups()[k] << "root node = " << rootForGroup[m->getGroups()[k]] << "sum below root = " << sums[k] << "sum above root = " << sumsAboveRoot[k] << endl; + } + exit(1); */ numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using @@ -339,34 +362,22 @@ int PhyloDiversityCommand::execute(){ if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); } } - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - if(processors == 1){ - driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true); - }else{ - if (rarefy) { - vector procIters; - - int numItersPerProcessor = iters / processors; - - //divide iters between processes - for (int h = 0; h < processors; h++) { - if(h == processors - 1){ - numItersPerProcessor = iters - h * numItersPerProcessor; - } - procIters.push_back(numItersPerProcessor); - } - - createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); - - }else{ //no need to paralellize if you dont want to rarefy - driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true); - } - } - - #else - driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true); - #endif - + if (rarefy) { + vector procIters; + int numItersPerProcessor = iters / processors; + + //divide iters between processes + for (int h = 0; h < processors; h++) { + if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; } + procIters.push_back(numItersPerProcessor); + } + + createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); + + }else{ //no need to paralellize if you dont want to rarefy + driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true); + } + if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); } } @@ -392,12 +403,13 @@ int PhyloDiversityCommand::execute(){ //********************************************************************************************************************** int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< string, vector >& div, map >& sumDiv, int numIters, int increment, vector& randomLeaf, set& numSampledList, ofstream& outCollect, ofstream& outSum){ try { - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - int process = 1; + int process = 1; vector processIDS; map< string, vector >::iterator itSum; - + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -466,6 +478,56 @@ int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< in.close(); m->mothurRemove(inTemp); } +#else + + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector cts; + vector trees; + map rootForGroup = getRootForGroups(t); + + //Create processor worker threads. + for( int i=1; icopy(ct); + Tree* copyTree = new Tree(copyCount); + copyTree->getCopy(t); + + cts.push_back(copyCount); + trees.push_back(copyTree); + + map > copydiv = div; + map > copysumDiv = sumDiv; + vector copyrandomLeaf = randomLeaf; + set copynumSampledList = numSampledList; + map copyRootForGrouping = rootForGroup; + + phylodivData* temp = new phylodivData(m, procIters[i], copydiv, copysumDiv, copyTree, copyCount, increment, copyrandomLeaf, copynumSampledList, copyRootForGrouping); + pDataArray.push_back(temp); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyPhyloDivThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (itSum = pDataArray[i]->sumDiv.begin(); itSum != pDataArray[i]->sumDiv.end(); itSum++) { + for (int k = 0; k < (itSum->second).size(); k++) { + sumDiv[itSum->first][k] += (itSum->second)[k]; + } + } + delete cts[i]; + delete trees[i]; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } #endif @@ -571,6 +633,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector >& div, ofst else { score = div[mGroups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << endl; + //cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl; } out.close(); @@ -635,7 +698,7 @@ vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< //you are a leaf if(t->tree[index].getBranchLength() != -1){ for (int k = 0; k < groups.size(); k++) { - sums[k] += abs(t->tree[index].getBranchLength()); + sums[k] += abs(t->tree[index].getBranchLength()); } } @@ -686,10 +749,10 @@ map PhyloDiversityCommand::getRootForGroups(Tree* t){ for (int j = 0; j < groups.size(); j++) { - if (done[groups[j]] == false) { //we haven't found the root for this group yet - - done[groups[j]] = true; - roots[groups[j]] = i; //set root to self to start + if (done[groups[j]] == false) { //we haven't found the root for this group yet, initialize it + done[groups[j]] = true; + roots[groups[j]] = i; //set root to self to start + } //while you aren't at root while(t->tree[index].getParent() != -1){ @@ -709,15 +772,17 @@ map PhyloDiversityCommand::getRootForGroups(Tree* t){ if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root - roots[groups[j]] = index; + if (index > roots[groups[j]]) { roots[groups[j]] = index; } }else { ;} index = t->tree[index].getParent(); } - } + //} } } + + return roots; }