From 0c5f99d1282405e20458e2822aa1a54774d2fa83 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 21 Jan 2014 15:26:12 -0500 Subject: [PATCH] added rjsd calculator. improved work balance load between processors for paralellized commands: metastats, unifrac.weighted, unifrac.unweighted, parsimony, chimera.uchime, chimera.perseus, chimera.slayer, shhh.seqs and pre.cluster. --- Mothur.xcodeproj/project.pbxproj | 6 ++++ chimeraperseuscommand.cpp | 17 +++++---- chimeraslayercommand.cpp | 4 +-- chimerauchimecommand.cpp | 18 +++++----- collectsharedcommand.cpp | 10 +++++- getmetacommunitycommand.cpp | 8 +++-- getmetacommunitycommand.h | 1 + matrixoutputcommand.cpp | 5 +-- matrixoutputcommand.h | 3 ++ metastatscommand.cpp | 17 +++++---- parsimony.cpp | 15 ++++---- preclustercommand.cpp | 16 +++++---- sharedrjsd.cpp | 59 ++++++++++++++++++++++++++++++++ sharedrjsd.h | 31 +++++++++++++++++ shhhseqscommand.cpp | 17 +++++---- summarysharedcommand.cpp | 4 ++- summarysharedcommand.h | 3 ++ unifracweightedcommand.cpp | 16 +++++---- unweighted.cpp | 17 ++++----- validcalculator.cpp | 15 +++++--- weighted.cpp | 15 ++++---- 21 files changed, 216 insertions(+), 81 deletions(-) create mode 100644 sharedrjsd.cpp create mode 100644 sharedrjsd.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 26456a3..f0a56a3 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -9,6 +9,7 @@ /* Begin PBXBuildFile section */ 219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */; }; 219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; }; + 483C952E188F0CAD0035E7B7 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */; }; 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; }; 834D9D581656D7C400E7FAB9 /* regularizedrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 834D9D561656D7C400E7FAB9 /* regularizedrandomforest.cpp */; }; 834D9D5C1656DEC800E7FAB9 /* regularizeddecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 834D9D5A1656DEC700E7FAB9 /* regularizeddecisiontree.cpp */; }; @@ -406,6 +407,8 @@ 219C1DE11552C508004209F9 /* newcommandtemplate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = newcommandtemplate.h; sourceTree = ""; }; 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcoremicrobiomecommand.cpp; sourceTree = ""; }; 219C1DE51559BCF2004209F9 /* getcoremicrobiomecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcoremicrobiomecommand.h; sourceTree = ""; }; + 483C952C188F0C960035E7B7 /* sharedrjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedrjsd.h; sourceTree = ""; }; + 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedrjsd.cpp; sourceTree = ""; }; 7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = refchimeratest.h; sourceTree = ""; }; 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = refchimeratest.cpp; sourceTree = ""; }; 7E78911B135F3E8600E725D2 /* eachgapdistignorens.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eachgapdistignorens.h; sourceTree = ""; }; @@ -1732,6 +1735,8 @@ A7E9B7FB12D37EC400DA6239 /* sharedjest.h */, A7222D711856276C0055A993 /* sharedjsd.h */, A7222D721856277C0055A993 /* sharedjsd.cpp */, + 483C952C188F0C960035E7B7 /* sharedrjsd.h */, + 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */, A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */, A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */, A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */, @@ -2258,6 +2263,7 @@ A7E9B94112D37EC400DA6239 /* setlogfilecommand.cpp in Sources */, A7E9B94212D37EC400DA6239 /* sffinfocommand.cpp in Sources */, A7E9B94312D37EC400DA6239 /* shannon.cpp in Sources */, + 483C952E188F0CAD0035E7B7 /* sharedrjsd.cpp in Sources */, A7E9B94412D37EC400DA6239 /* shannoneven.cpp in Sources */, A7E9B94512D37EC400DA6239 /* sharedace.cpp in Sources */, A7E9B94612D37EC400DA6239 /* sharedanderbergs.cpp in Sources */, diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp index 0547802..b8d70c1 100644 --- a/chimeraperseuscommand.cpp +++ b/chimeraperseuscommand.cpp @@ -1077,13 +1077,16 @@ int ChimeraPerseusCommand::createProcessesGroups(string outputFName, string accn //divide the groups between the processors vector lines; - int numGroupsPerProcessor = groups.size() / processors; - for (int i = 0; i < processors; i++) { - int startIndex = i * numGroupsPerProcessor; - int endIndex = (i+1) * numGroupsPerProcessor; - if(i == (processors - 1)){ endIndex = groups.size(); } - lines.push_back(linePair(startIndex, endIndex)); - } + int remainingPairs = groups.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; + } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 41661da..e7dc92e 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -868,8 +868,8 @@ int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosF map >::iterator itFile; vector filenames; for(itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { filenames.push_back(itFile->first); } - - int numGroupsPerProcessor = filenames.size() / processors; + + int numGroupsPerProcessor = ceil(filenames.size() / (double) processors); int startIndex = pid * numGroupsPerProcessor; int endIndex = (pid+1) * numGroupsPerProcessor; if(pid == (processors - 1)){ endIndex = filenames.size(); } diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 3a9f42b..7a48cf7 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -1803,14 +1803,16 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen //divide the groups between the processors vector lines; - int numGroupsPerProcessor = groups.size() / processors; - for (int i = 0; i < processors; i++) { - int startIndex = i * numGroupsPerProcessor; - int endIndex = (i+1) * numGroupsPerProcessor; - if(i == (processors - 1)){ endIndex = groups.size(); } - lines.push_back(linePair(startIndex, endIndex)); - } - + int remainingPairs = groups.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; + } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 56e60d8..966cc25 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -50,6 +50,7 @@ #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" +#include "sharedrjsd.h" //********************************************************************************************************************** @@ -58,7 +59,7 @@ vector CollectSharedCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); - CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); @@ -143,6 +144,7 @@ string CollectSharedCommand::getOutputPattern(string type) { else if (type == "memeuclidean") { pattern = "[filename],memeuclidean"; } else if (type == "mempearson") { pattern = "[filename],mempearson"; } else if (type == "jsd") { pattern = "[filename],jsd"; } + else if (type == "rjsd") { pattern = "[filename],rjsd"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -199,6 +201,7 @@ CollectSharedCommand::CollectSharedCommand(){ outputTypes["memeuclidean"] = tempOutNames; outputTypes["mempearson"] = tempOutNames; outputTypes["jsd"] = tempOutNames; + outputTypes["rjsd"] = tempOutNames; } catch(exception& e) { @@ -272,6 +275,7 @@ CollectSharedCommand::CollectSharedCommand(string option) { outputTypes["memeuclidean"] = tempOutNames; outputTypes["mempearson"] = tempOutNames; outputTypes["jsd"] = tempOutNames; + outputTypes["rjsd"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -467,7 +471,11 @@ CollectSharedCommand::CollectSharedCommand(string option) { }else if (Estimators[i] == "jsd") { cDisplays.push_back(new CollectDisplay(new JSD(), new SharedOneColumnFile(getOutputFileName("jsd", variables)))); outputNames.push_back(getOutputFileName("jsd", variables)); outputTypes["jsd"].push_back(getOutputFileName("jsd", variables)); + }else if (Estimators[i] == "rjsd") { + cDisplays.push_back(new CollectDisplay(new RJSD(), new SharedOneColumnFile(getOutputFileName("rjsd", variables)))); + outputNames.push_back(getOutputFileName("rjsd", variables)); outputTypes["rjsd"].push_back(getOutputFileName("rjsd", variables)); } + } } diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp index b316074..de59edf 100644 --- a/getmetacommunitycommand.cpp +++ b/getmetacommunitycommand.cpp @@ -18,7 +18,7 @@ vector GetMetaCommunityCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jsd", "", "", "","",false,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "rjsd", "", "", "","",false,false,true); parameters.push_back(pcalc); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions); @@ -190,9 +190,9 @@ GetMetaCommunityCommand::GetMetaCommunityCommand(string option) { else { m->mothurOut("[ERROR]: " + method + " is not a valid method. Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; } calc = validParameter.validFile(parameters, "calc", false); - if (calc == "not found") { calc = "jsd"; } + if (calc == "not found") { calc = "rjsd"; } else { - if (calc == "default") { calc = "jsd"; } + if (calc == "default") { calc = "rjsd"; } } m->splitAtDash(calc, Estimators); if (m->inUsersGroups("citation", Estimators)) { @@ -961,6 +961,8 @@ vector > GetMetaCommunityCommand::generateDistanceMatrix(vectormothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results; } diff --git a/getmetacommunitycommand.h b/getmetacommunitycommand.h index 9c17e4d..d84a65d 100644 --- a/getmetacommunitycommand.h +++ b/getmetacommunitycommand.h @@ -55,6 +55,7 @@ #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" +#include "sharedrjsd.h" /**************************************************************************************************/ diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index 8a7656c..2e6943f 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -17,7 +17,7 @@ vector MatrixOutputCommand::setParameters(){ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); - CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); @@ -283,7 +283,8 @@ MatrixOutputCommand::MatrixOutputCommand(string option) { matrixCalculators.push_back(new MemPearson()); }else if (Estimators[i] == "jsd") { matrixCalculators.push_back(new JSD()); - + }else if (Estimators[i] == "rjsd") { + matrixCalculators.push_back(new RJSD()); } } } diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h index 5ddfb2c..a19ffc8 100644 --- a/matrixoutputcommand.h +++ b/matrixoutputcommand.h @@ -55,6 +55,7 @@ #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" +#include "sharedrjsd.h" // aka. dist.shared() @@ -227,6 +228,8 @@ static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){ matrixCalculators.push_back(new MemPearson()); }else if (pDataArray->Estimators[i] == "jsd") { matrixCalculators.push_back(new JSD()); + }else if (pDataArray->Estimators[i] == "rjsd") { + matrixCalculators.push_back(new RJSD()); } } diff --git a/metastatscommand.cpp b/metastatscommand.cpp index de9e373..de03dab 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -256,15 +256,14 @@ int MetaStatsCommand::execute(){ else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } if(processors != 1){ - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); + int remainingPairs = namesOfGroupCombos.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; } } diff --git a/parsimony.cpp b/parsimony.cpp index b12ca1f..1b08e8a 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -56,13 +56,14 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { } lines.clear(); - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(linePair(startPos, numPairsPerProcessor)); + int remainingPairs = namesOfGroupCombos.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); diff --git a/preclustercommand.cpp b/preclustercommand.cpp index fe722a6..05c8b7d 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -369,13 +369,15 @@ int PreClusterCommand::createProcessesGroups(string newFName, string newNName, s //divide the groups between the processors vector lines; - int numGroupsPerProcessor = groups.size() / processors; - for (int i = 0; i < processors; i++) { - int startIndex = i * numGroupsPerProcessor; - int endIndex = (i+1) * numGroupsPerProcessor; - if(i == (processors - 1)){ endIndex = groups.size(); } - lines.push_back(linePair(startIndex, endIndex)); - } + int remainingPairs = groups.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; + } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) diff --git a/sharedrjsd.cpp b/sharedrjsd.cpp new file mode 100644 index 0000000..4699054 --- /dev/null +++ b/sharedrjsd.cpp @@ -0,0 +1,59 @@ +// +// sharedrjsd.cpp +// Mothur +// +// Created by Sarah Westcott on 1/21/14. +// Copyright (c) 2014 Schloss Lab. All rights reserved. +// + +#include "sharedrjsd.h" + +/***********************************************************************/ +//KLD <- function(x,y) sum(x *log(x/y)) +//JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2)) +EstOutput RJSD::getValues(vector shared) { + try { + + data.resize(1,0); + + double KLD1 = 0.0; + double KLD2 = 0.0; + + vector countA = shared[0]->getAbundances(); + vector countB = shared[1]->getAbundances(); + double totalA = 0; + double totalB = 0; + + for (int i = 0; i < shared[0]->getNumBins(); i++) { + totalA += countA[i]; + totalB += countB[i]; + } + + for (int i = 0; i < shared[0]->getNumBins(); i++) { + double tempA = countA[i] / totalA; + double tempB = countB[i] / totalB; + + tempA = countA[i] / totalA; + tempB = countB[i] / totalB; + + if (tempA == 0) { tempA = 0.000001; } + if (tempB == 0) { tempB = 0.000001; } + + double denom = (tempA+tempB)/(double)2.0; + + if (tempA != 0) { KLD1 += tempA * log(tempA/denom); } //KLD(x,m) + if (tempB != 0) { KLD2 += tempB * log(tempB/denom); } //KLD(y,m) + + } + + data[0] = sqrt((0.5*KLD1) + (0.5*KLD2)); + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + + return data; + } + catch(exception& e) { + m->errorOut(e, "RJSD", "getValues"); + exit(1); + } +} diff --git a/sharedrjsd.h b/sharedrjsd.h new file mode 100644 index 0000000..9c98696 --- /dev/null +++ b/sharedrjsd.h @@ -0,0 +1,31 @@ +// +// sharedrjsd.h +// Mothur +// +// Created by Sarah Westcott on 1/21/14. +// Copyright (c) 2014 Schloss Lab. All rights reserved. +// + +#ifndef Mothur_sharedrjsd_h +#define Mothur_sharedrjsd_h + +#include "calculator.h" + +/***********************************************************************/ +//Jensen-Shannon divergence (JSD) +class RJSD : public Calculator { + +public: + RJSD() : Calculator("rjsd", 1, false) {}; + EstOutput getValues(SAbundVector*) {return data;}; + EstOutput getValues(vector); + string getCitation() { return "http://www.mothur.org/wiki/RJSD"; } +private: + +}; + +/***********************************************************************/ + + + +#endif diff --git a/shhhseqscommand.cpp b/shhhseqscommand.cpp index 82d9561..0cc6eb4 100644 --- a/shhhseqscommand.cpp +++ b/shhhseqscommand.cpp @@ -377,13 +377,16 @@ vector ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, st //divide the groups between the processors vector lines; - int numGroupsPerProcessor = groups.size() / processors; - for (int i = 0; i < processors; i++) { - int startIndex = i * numGroupsPerProcessor; - int endIndex = (i+1) * numGroupsPerProcessor; - if(i == (processors - 1)){ endIndex = groups.size(); } - lines.push_back(linePair(startIndex, endIndex)); - } + int remainingPairs = groups.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; + } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index ef5e365..59c12a0 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -17,7 +17,7 @@ vector SummarySharedCommand::setParameters(){ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample); CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance); - CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); + CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); @@ -296,6 +296,8 @@ SummarySharedCommand::SummarySharedCommand(string option) { sumCalculators.push_back(new MemPearson()); }else if (Estimators[i] == "jsd") { sumCalculators.push_back(new JSD()); + }else if (Estimators[i] == "rjsd") { + sumCalculators.push_back(new RJSD()); } } } diff --git a/summarysharedcommand.h b/summarysharedcommand.h index 1b3f164..7a2a190 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -57,6 +57,7 @@ #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" +#include "sharedrjsd.h" class SummarySharedCommand : public Command { @@ -219,6 +220,8 @@ static DWORD WINAPI MySummarySharedThreadFunction(LPVOID lpParam){ sumCalculators.push_back(new MemPearson()); }else if (pDataArray->Estimators[i] == "jsd") { sumCalculators.push_back(new JSD()); + }else if (pDataArray->Estimators[i] == "rjsd") { + sumCalculators.push_back(new RJSD()); } } } diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 4883c48..1c146ac 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -699,14 +699,16 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector usersS lines.clear(); //breakdown work between processors - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(linePair(startPos, numPairsPerProcessor)); + int remainingPairs = namesOfGroupCombos.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; } + //get scores for random trees diff --git a/unweighted.cpp b/unweighted.cpp index a845f9b..8fbd9d2 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -51,13 +51,14 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { } lines.clear(); - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(linePair(startPos, numPairsPerProcessor)); + int remainingPairs = namesOfGroupCombos.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); @@ -316,7 +317,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st lines.clear(); int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; + int numPairsPerProcessor = ceil(numPairs / processors); for (int i = 0; i < processors; i++) { int startPos = i * numPairsPerProcessor; diff --git a/validcalculator.cpp b/validcalculator.cpp index 4718e2f..98f0d9a 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -77,6 +77,7 @@ #include "sharedsobs.h" #include "sharednseqs.h" #include "sharedjsd.h" +#include "sharedrjsd.h" /********************************************************************/ @@ -208,6 +209,7 @@ void ValidCalculators::printCitations(vector Estimators) { }else if (Estimators[i] == "sharedobserved") { Calculator* temp = new SharedSobs(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else if (Estimators[i] == "jsd") { Calculator* temp = new JSD(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; + }else if (Estimators[i] == "rjsd") { Calculator* temp = new RJSD(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp; }else { m->mothurOut("[ERROR]: Missing else if for " + Estimators[i] + " in printCitations."); m->mothurOutEndLine(); } }else { m->mothurOut(Estimators[i] + " is not a valid calculator, no citation will be given."); m->mothurOutEndLine(); } } @@ -458,7 +460,8 @@ void ValidCalculators::initialShared() { shared["memchord"] = "memchord"; shared["memeuclidean"] = "memeuclidean"; shared["mempearson"] = "mempearson"; - shared["jsd"] = "jsd"; + shared["jsd"] = "jsd"; + shared["rjsd"] = "rjsd"; shared["default"] = "default"; } catch(exception& e) { @@ -572,7 +575,8 @@ void ValidCalculators::initialSharedSummary() { sharedsummary["memchord"] = "memchord"; sharedsummary["memeuclidean"] = "memeuclidean"; sharedsummary["mempearson"] = "mempearson"; - sharedsummary["jsd"] = "jsd"; + sharedsummary["jsd"] = "jsd"; + sharedsummary["rjsd"] = "rjsd"; sharedsummary["default"] = "default"; } catch(exception& e) { @@ -730,14 +734,15 @@ void ValidCalculators::initialMatrix() { matrix["structchi2"] = "structchi2"; matrix["soergel"] = "soergel"; matrix["spearman"] = "spearman"; - matrix["speciesprofile"] = "speciesprofile"; + matrix["speciesprofile"] = "speciesprofile"; matrix["hamming"] = "hamming"; matrix["gower"] = "gower"; matrix["memchi2"] = "memchi2"; matrix["memchord"] = "memchord"; matrix["memeuclidean"] = "memeuclidean"; - matrix["mempearson"] = "mempearson"; - matrix["jsd"] = "jsd"; + matrix["mempearson"] = "mempearson"; + matrix["rjsd"] = "rjsd"; + matrix["jsd"] = "jsd"; } catch(exception& e) { diff --git a/weighted.cpp b/weighted.cpp index b0d06fb..49bf6bf 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -36,13 +36,14 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { } } - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; - - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } - lines.push_back(linePair(startPos, numPairsPerProcessor)); + int remainingPairs = namesOfGroupCombos.size(); + int startIndex = 0; + for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { + int numPairs = remainingPairs; //case for last processor + if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } + lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs + startIndex = startIndex + numPairs; + remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); -- 2.39.2