From 4a760c2d164aa955dee7d3d38da323822763d906 Mon Sep 17 00:00:00 2001 From: SarahsWork Date: Fri, 12 Apr 2013 13:52:41 -0400 Subject: [PATCH] added get.metacommunity command. --- Mothur.xcodeproj/project.pbxproj | 12 + commandfactory.cpp | 5 + getmetacommunitycommand.cpp | 550 +++++++++++++ getmetacommunitycommand.h | 64 ++ linearalgebra.cpp | 200 ++++- linearalgebra.h | 7 + qFinderDMM.cpp | 1326 ++++++++++++++++++++++++++++++ qFinderDMM.h | 69 ++ sharedrabundvector.cpp | 12 + sharedrabundvector.h | 1 + 10 files changed, 2226 insertions(+), 20 deletions(-) create mode 100644 getmetacommunitycommand.cpp create mode 100644 getmetacommunitycommand.h create mode 100644 qFinderDMM.cpp create mode 100644 qFinderDMM.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ce07a03..284bb59 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -44,6 +44,8 @@ A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; }; A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; }; + A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */; }; + A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */; }; A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; }; A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; }; @@ -462,6 +464,10 @@ A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = ""; }; A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = ""; }; A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = ""; }; + A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = getmetacommunitycommand.h; sourceTree = ""; }; + A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getmetacommunitycommand.cpp; sourceTree = ""; }; + A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qFinderDMM.cpp; sourceTree = ""; }; + A7548FAF171440ED00B1F05A /* qFinderDMM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qFinderDMM.h; sourceTree = ""; }; A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = ""; }; A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = ""; }; A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcrseqscommand.h; sourceTree = ""; }; @@ -1192,6 +1198,8 @@ A7E9B77C12D37EC400DA6239 /* overlap.hpp */, A7E9B79B12D37EC400DA6239 /* progress.cpp */, A7E9B79C12D37EC400DA6239 /* progress.hpp */, + A7548FAF171440ED00B1F05A /* qFinderDMM.h */, + A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */, A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */, A7386C191619C9FB00651424 /* randomforest */, A7E9B7A612D37EC400DA6239 /* rarecalc.h */, @@ -1375,6 +1383,8 @@ A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */, A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */, A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */, + A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */, + A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */, A70056E8156A93E300924A2D /* getotulabelscommand.h */, A70056E5156A93D000924A2D /* getotulabelscommand.cpp */, A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */, @@ -2334,6 +2344,8 @@ A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */, A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */, A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */, + A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */, + A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index 193c53c..6c6bc17 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -141,6 +141,7 @@ #include "getdistscommand.h" #include "removedistscommand.h" #include "mergetaxsummarycommand.h" +#include "getmetacommunitycommand.h" /*******************************************************/ @@ -305,6 +306,7 @@ CommandFactory::CommandFactory(){ commands["get.dists"] = "get.dists"; commands["remove.dists"] = "remove.dists"; commands["merge.taxsummary"] = "merge.taxsummary"; + commands["get.metacommunity"] = "get.metacommunity"; } @@ -525,6 +527,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); } else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); } else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); } + else if(commandName == "get.metacommunity") { command = new GetMetaCommunityCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -686,6 +689,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "get.dists") { pipecommand = new GetDistsCommand(optionString); } else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); } else if(commandName == "merge.taxsummary") { pipecommand = new MergeTaxSummaryCommand(optionString); } + else if(commandName == "get.metacommunity") { pipecommand = new GetMetaCommunityCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -833,6 +837,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "get.dists") { shellcommand = new GetDistsCommand(); } else if(commandName == "remove.dists") { shellcommand = new RemoveDistsCommand(); } else if(commandName == "merge.taxsummary") { shellcommand = new MergeTaxSummaryCommand(); } + else if(commandName == "get.metacommunity") { shellcommand = new GetMetaCommunityCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp new file mode 100644 index 0000000..59efe60 --- /dev/null +++ b/getmetacommunitycommand.cpp @@ -0,0 +1,550 @@ +// +// getmetacommunitycommand.cpp +// Mothur +// +// Created by SarahsWork on 4/9/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#include "getmetacommunitycommand.h" +#include "qFinderDMM.h" + +//********************************************************************************************************************** +vector GetMetaCommunityCommand::setParameters(){ + try { + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions); + CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions); + CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "NewCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string GetMetaCommunityCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n"; + helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n"; + helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n"; + helpString += "The minpartitions parameter is used to .... Default=5.\n"; + helpString += "The maxpartitions parameter is used to .... Default=10.\n"; + helpString += "The optimizegap parameter is used to .... Default=3.\n"; + helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string GetMetaCommunityCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fit") { pattern = "[filename],[distance],mix.fit"; } + else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; } + else if (type == "design") { pattern = "[filename],mix.design"; } + else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; } + else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; } + else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +GetMetaCommunityCommand::GetMetaCommunityCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["fit"] = tempOutNames; + outputTypes["relabund"] = tempOutNames; + outputTypes["matrix"] = tempOutNames; + outputTypes["design"] = tempOutNames; + outputTypes["parameters"] = tempOutNames; + outputTypes["summary"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand"); + exit(1); + } +} +//********************************************************************************************************************** +GetMetaCommunityCommand::GetMetaCommunityCommand(string option) { + try { + abort = false; calledHelp = false; + allLines=true; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + //valid paramters for this command + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + vector tempOutNames; + outputTypes["fit"] = tempOutNames; + outputTypes["relabund"] = tempOutNames; + outputTypes["matrix"] = tempOutNames; + outputTypes["design"] = tempOutNames; + outputTypes["parameters"] = tempOutNames; + outputTypes["summary"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + if(it != parameters.end()){ + path = m->hasPath(it->second); + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + //get shared file, it is required + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it + } + + string temp = validParameter.validFile(parameters, "minpartitions", false); if (temp == "not found"){ temp = "5"; } + m->mothurConvert(temp, minpartitions); + + temp = validParameter.validFile(parameters, "maxpartitions", false); if (temp == "not found"){ temp = "10"; } + m->mothurConvert(temp, maxpartitions); + + temp = validParameter.validFile(parameters, "optimizegap", false); if (temp == "not found"){ temp = "3"; } + m->mothurConvert(temp, optimizegap); + + string groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { m->splitAtDash(groups, Groups); } + m->setGroups(Groups); + + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + } + + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int GetMetaCommunityCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { return 0; } + + //get next line to process + lookup = input.getSharedRAbundVectors(); + } + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + //output files created by command + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +int GetMetaCommunityCommand::process(vector& thislookup){ + try { + + double minLaplace = 1e10; + int minPartition = 0; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = thislookup[0]->getLabel(); + string outputFileName = getOutputFileName("fit", variables); + outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName); + + ofstream fitData; + m->openOutputFile(outputFileName, fitData); + fitData.setf(ios::fixed, ios::floatfield); + fitData.setf(ios::showpoint); + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + + vector< vector > sharedMatrix; + for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); } + + m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); + fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; + + for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){ + + if (m->control_pressed) { break; } + + qFinderDMM findQ(sharedMatrix, numPartitions); + + double laplace = findQ.getLaplace(); + m->mothurOut(toString(numPartitions) + '\t'); + cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t'; + m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t'); + cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace; + m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace)); + + fitData << numPartitions << '\t'; + fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t'; + fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl; + + if(laplace < minLaplace){ + minPartition = numPartitions; + minLaplace = laplace; + m->mothurOut("***"); + } + m->mothurOutEndLine(); + + variables["[tag]"] = toString(numPartitions); + string relabund = getOutputFileName("relabund", variables); + outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); + string matrix = getOutputFileName("matrix", variables); + outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix); + + findQ.printZMatrix(matrix, m->getGroups()); + findQ.printRelAbund(relabund, m->currentBinLabels); + + if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break; } + } + fitData.close(); + + //minPartition = 4; + + if (m->control_pressed) { return 0; } + + generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel()); + + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "process"); + exit(1); + } +} +/**************************************************************************************************/ + +vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){ + try { + vector piValues(numPartitions, 0); + + ifstream postFile; + m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input)); + string outputFileName = getOutputFileName("design", variables); + ofstream designFile; + m->openOutputFile(outputFileName, designFile); + outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName); + + + vector titles(numPartitions); + + for(int i=0;i> titles[i]; } + + double posterior; + string sampleName; + int numSamples = 0; + + while(postFile){ + + if (m->control_pressed) { break; } + + double maxPosterior = 0.0000; + int maxPartition = -1; + + postFile >> sampleName; + + for(int i=0;i> posterior; + if(posterior > maxPosterior){ + maxPosterior = posterior; + maxPartition = i; + } + piValues[i] += posterior; + + } + + designFile << sampleName << '\t' << titles[maxPartition] << endl; + + numSamples++; + m->gobble(postFile); + } + for(int i=0;ierrorOut(e, "GetMetaCommunityCommand", "generateDesignFile"); + exit(1); + } +} + +/**************************************************************************************************/ + +inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; } + +/**************************************************************************************************/ +int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){ + try { + vector summary; + + vector pMean(numPartitions, 0); + vector pLCI(numPartitions, 0); + vector pUCI(numPartitions, 0); + + string name, header; + double mean, lci, uci; + + + vector piValues = generateDesignFile(numPartitions, designInput); + + ifstream referenceFile; + m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str()); + ifstream partitionFile; + m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str()); + + header = m->getline(referenceFile); + header = m->getline(partitionFile); + stringstream head(header); + string dummy, label; + head >> dummy; + vector thetaValues(numPartitions, ""); + for(int i=0;i> label >> dummy >> dummy; + thetaValues[i] = label.substr(label.find_last_of('_')+1); + } + + + vector partitionDiff(numPartitions, 0.0000); + + while(referenceFile){ + + if (m->control_pressed) { break; } + + referenceFile >> name >> mean >> lci >> uci; + + summaryData tempData; + tempData.name = name; + tempData.refMean = mean; + + double difference = 0.0000; + + partitionFile >> name; + for(int j=0;j> pMean[j] >> pLCI[j] >> pUCI[j]; + difference += abs(mean - pMean[j]); + partitionDiff[j] += abs(mean - pMean[j]);; + } + + tempData.partMean = pMean; + tempData.partLCI = pLCI; + tempData.partUCI = pUCI; + tempData.difference = difference; + summary.push_back(tempData); + + m->gobble(referenceFile); + m->gobble(partitionFile); + } + referenceFile.close(); + partitionFile.close(); + + if (m->control_pressed) { return 0; } + + int numOTUs = (int)summary.size(); + + sort(summary.begin(), summary.end(), summaryFunction); + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = label; + string outputFileName = getOutputFileName("parameters", variables); + outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName); + + ofstream parameterFile; + m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str()); + parameterFile.setf(ios::fixed, ios::floatfield); + parameterFile.setf(ios::showpoint); + + double totalDifference = 0.0000; + parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n"; + for(int i=0;icontrol_pressed) { break; } + parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl; + totalDifference += partitionDiff[i]; + } + parameterFile.close(); + + if (m->control_pressed) { return 0; } + + string summaryFileName = getOutputFileName("summary", variables); + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + + ofstream summaryFile; + m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str()); + summaryFile.setf(ios::fixed, ios::floatfield); + summaryFile.setf(ios::showpoint); + + + summaryFile << "OTU\tP0.mean"; + for(int i=0;icontrol_pressed) { break; } + summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean; + for(int j=0;jerrorOut(e, "GetMetaCommunityCommand", "generateSummaryFile"); + exit(1); + } + +} +//********************************************************************************************************************** + diff --git a/getmetacommunitycommand.h b/getmetacommunitycommand.h new file mode 100644 index 0000000..62070d6 --- /dev/null +++ b/getmetacommunitycommand.h @@ -0,0 +1,64 @@ +// +// getmetacommunitycommand.h +// Mothur +// +// Created by SarahsWork on 4/9/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#ifndef Mothur_getmetacommunitycommand_h +#define Mothur_getmetacommunitycommand_h + +#include "command.hpp" +#include "inputdata.h" + +/**************************************************************************************************/ + +class GetMetaCommunityCommand : public Command { +public: + GetMetaCommunityCommand(string); + GetMetaCommunityCommand(); + ~GetMetaCommunityCommand(){} + + vector setParameters(); + string getCommandName() { return "get.metacommunity"; } + string getCommandCategory() { return "OTU-Based Approaches"; } + + string getOutputPattern(string); + + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Get.metacommunity"; } + string getDescription() { return "brief description"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + +private: + bool abort, allLines; + string outputDir; + vector outputNames; + string sharedfile; + int minpartitions, maxpartitions, optimizegap; + vector Groups; + set labels; + + int process(vector&); + vector generateDesignFile(int, string); + int generateSummaryFile(int, string, string, string, string); + +}; + +/**************************************************************************************************/ +struct summaryData { + + string name; + double refMean, difference; + vector partMean, partLCI, partUCI; + +}; +/**************************************************************************************************/ + + + + +#endif diff --git a/linearalgebra.cpp b/linearalgebra.cpp index effb1ad..58f1acc 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -1479,29 +1479,189 @@ double LinearAlgebra::calcPearsonSig(double n, double r){ /*********************************************************************************************************************************/ vector > LinearAlgebra::getObservedEuclideanDistance(vector >& relAbundData){ + try { - int numSamples = relAbundData.size(); - int numOTUs = relAbundData[0].size(); - - vector > dMatrix(numSamples); - for(int i=0;i > dMatrix(numSamples); + for(int i=0;icontrol_pressed) { return dMatrix; } + + double d = 0; + for(int k=0;kerrorOut(e, "LinearAlgebra", "getObservedEuclideanDistance"); + exit(1); + } +} + +/*********************************************************************************************************************************/ +vector LinearAlgebra::solveEquations(vector > A, vector b){ + try { + int length = (int)b.size(); + vector x(length, 0); + vector index(length); + for(int i=0;icontrol_pressed) { return b; } + lubksb(A, index, b); + + return b; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "solveEquations"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::ludcmp(vector >& A, vector& index, double& d){ + try { + double tiny = 1e-20; + + int n = (int)A.size(); + vector vv(n, 0.0); + double temp; + int imax; + + d = 1.0; + + for(int i=0;i big ) big=temp; } + if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); } + vv[i] = 1.0/big; + } + + for(int j=0;jcontrol_pressed) { break; } + for(int i=0;i= big){ + big = dum; + imax = i; + } + } + if(j != imax){ + for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ + try { + double total; + int n = (int)A.size(); + int ii = 0; + + for(int i=0;icontrol_pressed) { break; } + int ip = index[i]; + total = b[ip]; + b[ip] = b[i]; + + if (ii != 0) { + for(int j=ii-1;j=0;i--){ + total = b[i]; + for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); + exit(1); + } +} + +/*********************************************************************************************************************************/ + +vector > LinearAlgebra::getInverse(vector > matrix){ + try { + int n = (int)matrix.size(); + + vector > inverse(n); + for(int i=0;i column(n, 0.0000); + vector index(n, 0); + double dummy; + + ludcmp(matrix, index, dummy); + + for(int j=0;jcontrol_pressed) { break; } + + column.assign(n, 0); + + column[j] = 1.0000; + + lubksb(matrix, index, column); + + for(int i=0;ierrorOut(e, "LinearAlgebra", "getInverse"); + exit(1); } - return dMatrix; - } /*********************************************************************************************************************************/ diff --git a/linearalgebra.h b/linearalgebra.h index ecb635f..ef9e03c 100644 --- a/linearalgebra.h +++ b/linearalgebra.h @@ -38,6 +38,9 @@ public: double calcPearsonSig(double, double); //length, coeff. double calcKendallSig(double, double); //length, coeff. + vector solveEquations(vector >, vector); + vector > getInverse(vector >); + private: MothurOut* m; @@ -59,6 +62,10 @@ private: double ran4(int&); //for testing void psdes(unsigned long &, unsigned long &); //for testing + void ludcmp(vector >&, vector&, double&); + void lubksb(vector >&, vector&, vector&); + + }; #endif diff --git a/qFinderDMM.cpp b/qFinderDMM.cpp new file mode 100644 index 0000000..c89644b --- /dev/null +++ b/qFinderDMM.cpp @@ -0,0 +1,1326 @@ +// +// qFinderDMM.cpp +// pds_dmm +// +// Created by Patrick Schloss on 11/8/12. +// Copyright (c) 2012 University of Michigan. All rights reserved. +// + +#include "qFinderDMM.h" +#include "linearalgebra.h" + +#define EPSILON numeric_limits::epsilon() + +/**************************************************************************************************/ + +qFinderDMM::qFinderDMM(vector > cm, int p): countMatrix(cm), numPartitions(p){ + try { + m = MothurOut::getInstance(); + numSamples = (int)countMatrix.size(); + numOTUs = (int)countMatrix[0].size(); + + + kMeans(); + // cout << "done kMeans" << endl; + + optimizeLambda(); + + + // cout << "done optimizeLambda" << endl; + + double change = 1.0000; + currNLL = 0.0000; + + int iter = 0; + + while(change > 1.0e-6 && iter < 100){ + + // cout << "Calc_Z: "; + calculatePiK(); + + optimizeLambda(); + + // printf("Iter:%d\t",iter); + + for(int i=0;i 0){ + logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition])); + } + vector > hessian = getHessian(); + vector > invHessian = l.getInverse(hessian); + + for(int i=0;ierrorOut(e, "qFinderDMM", "qFinderDMM"); + exit(1); + } +} + +/**************************************************************************************************/ + +void qFinderDMM::printZMatrix(string fileName, vector sampleName){ + try { + ofstream printMatrix; + m->openOutputFile(fileName, printMatrix); //(fileName.c_str()); + printMatrix.setf(ios::fixed, ios::floatfield); + printMatrix.setf(ios::showpoint); + + for(int i=0;ierrorOut(e, "qFinderDMM", "printZMatrix"); + exit(1); + } +} + +/**************************************************************************************************/ + +void qFinderDMM::printRelAbund(string fileName, vector otuNames){ + try { + ofstream printRA; + m->openOutputFile(fileName, printRA); //(fileName.c_str()); + printRA.setf(ios::fixed, ios::floatfield); + printRA.setf(ios::showpoint); + + vector totals(numPartitions, 0.0000); + for(int i=0;icontrol_pressed) { break; } + + printRA << otuNames[i]; + for(int j=0;j= 0.0000){ + double std = sqrt(error[j][i]); + printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j]; + printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j]; + printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j]; + } + else{ + printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j]; + printRA << '\t' << "NA"; + printRA << '\t' << "NA"; + } + } + printRA << endl; + } + + printRA.close(); + } + catch(exception& e) { + m->errorOut(e, "qFinderDMM", "printRelAbund"); + exit(1); + } +} + +/**************************************************************************************************/ + +// these functions for bfgs2 solver were lifted from the gnu_gsl source code... + +/* Find a minimum in x=[0,1] of the interpolating quadratic through + * (0,f0) (1,f1) with derivative fp0 at x=0. The interpolating + * polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2 + */ + +static double +interp_quad (double f0, double fp0, double f1, double zl, double zh) +{ + double fl = f0 + zl*(fp0 + zl*(f1 - f0 -fp0)); + double fh = f0 + zh*(fp0 + zh*(f1 - f0 -fp0)); + double c = 2 * (f1 - f0 - fp0); /* curvature */ + + double zmin = zl, fmin = fl; + + if (fh < fmin) { zmin = zh; fmin = fh; } + + if (c > 0) /* positive curvature required for a minimum */ + { + double z = -fp0 / c; /* location of minimum */ + if (z > zl && z < zh) { + double f = f0 + z*(fp0 + z*(f1 - f0 -fp0)); + if (f < fmin) { zmin = z; fmin = f; }; + } + } + + return zmin; +} + +/**************************************************************************************************/ + +/* Find a minimum in x=[0,1] of the interpolating cubic through + * (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1. + * + * The interpolating polynomial is: + * + * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3 + * + * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). + */ + +double cubic (double c0, double c1, double c2, double c3, double z){ + return c0 + z * (c1 + z * (c2 + z * c3)); +} + +/**************************************************************************************************/ + +void check_extremum (double c0, double c1, double c2, double c3, double z, + double *zmin, double *fmin){ + /* could make an early return by testing curvature >0 for minimum */ + + double y = cubic (c0, c1, c2, c3, z); + + if (y < *fmin) + { + *zmin = z; /* accepted new point*/ + *fmin = y; + } +} + +/**************************************************************************************************/ + +int gsl_poly_solve_quadratic (double a, double b, double c, + double *x0, double *x1) +{ + + double disc = b * b - 4 * a * c; + + if (a == 0) /* Handle linear case */ + { + if (b == 0) + { + return 0; + } + else + { + *x0 = -c / b; + return 1; + }; + } + + if (disc > 0) + { + if (b == 0) + { + double r = fabs (0.5 * sqrt (disc) / a); + *x0 = -r; + *x1 = r; + } + else + { + double sgnb = (b > 0 ? 1 : -1); + double temp = -0.5 * (b + sgnb * sqrt (disc)); + double r1 = temp / a ; + double r2 = c / temp ; + + if (r1 < r2) + { + *x0 = r1 ; + *x1 = r2 ; + } + else + { + *x0 = r2 ; + *x1 = r1 ; + } + } + return 2; + } + else if (disc == 0) + { + *x0 = -0.5 * b / a ; + *x1 = -0.5 * b / a ; + return 2 ; + } + else + { + return 0; + } + +} + +/**************************************************************************************************/ + +double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, double zh){ + double eta = 3 * (f1 - f0) - 2 * fp0 - fp1; + double xi = fp0 + fp1 - 2 * (f1 - f0); + double c0 = f0, c1 = fp0, c2 = eta, c3 = xi; + double zmin, fmin; + double z0, z1; + + zmin = zl; fmin = cubic(c0, c1, c2, c3, zl); + check_extremum (c0, c1, c2, c3, zh, &zmin, &fmin); + + { + int n = gsl_poly_solve_quadratic (3 * c3, 2 * c2, c1, &z0, &z1); + + if (n == 2) /* found 2 roots */ + { + if (z0 > zl && z0 < zh) + check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin); + if (z1 > zl && z1 < zh) + check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin); + } + else if (n == 1) /* found 1 root */ + { + if (z0 > zl && z0 < zh) + check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin); + } + } + + return zmin; +} + +/**************************************************************************************************/ + +double interpolate (double a, double fa, double fpa, + double b, double fb, double fpb, double xmin, double xmax){ + /* Map [a,b] to [0,1] */ + double z, alpha, zmin, zmax; + + zmin = (xmin - a) / (b - a); + zmax = (xmax - a) / (b - a); + + if (zmin > zmax) + { + double tmp = zmin; + zmin = zmax; + zmax = tmp; + }; + + if(!isnan(fpb) ){ + z = interp_cubic (fa, fpa * (b - a), fb, fpb * (b - a), zmin, zmax); + } + else{ + z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax); + } + + + alpha = a + z * (b - a); + + return alpha; +} + +/**************************************************************************************************/ + +int qFinderDMM::lineMinimizeFletcher(vector& x, vector& p, double f0, double df0, double alpha1, double& alphaNew, double& fAlpha, vector& xalpha, vector& gradient ){ + try { + + double rho = 0.01; + double sigma = 0.10; + double tau1 = 9.00; + double tau2 = 0.05; + double tau3 = 0.50; + + double alpha = alpha1; + double alpha_prev = 0.0000; + + xalpha.resize(numOTUs, 0.0000); + + double falpha_prev = f0; + double dfalpha_prev = df0; + + double a = 0.0000; double b = alpha; + double fa = f0; double fb = 0.0000; + double dfa = df0; double dfb = 0.0/0.0; + + int iter = 0; + int maxIters = 100; + while(iter++ < maxIters){ + if (m->control_pressed) { break; } + + for(int i=0;i f0 + alpha * rho * df0 || fAlpha >= falpha_prev){ + a = alpha_prev; b = alpha; + fa = falpha_prev; fb = fAlpha; + dfa = dfalpha_prev; dfb = 0.0/0.0; + break; + } + + negativeLogDerivEvidenceLambdaPi(xalpha, gradient); + double dfalpha = 0.0000; + for(int i=0;i= 0){ + a = alpha; b = alpha_prev; + fa = fAlpha; fb = falpha_prev; + dfa = dfalpha; dfb = dfalpha_prev; + break; + } + + double delta = alpha - alpha_prev; + + double lower = alpha + delta; + double upper = alpha + tau1 * delta; + + double alphaNext = interpolate(alpha_prev, falpha_prev, dfalpha_prev, alpha, fAlpha, dfalpha, lower, upper); + + alpha_prev = alpha; + falpha_prev = fAlpha; + dfalpha_prev = dfalpha; + alpha = alphaNext; + } + + iter = 0; + while(iter++ < maxIters){ + if (m->control_pressed) { break; } + + double delta = b - a; + + double lower = a + tau2 * delta; + double upper = b - tau3 * delta; + + alpha = interpolate(a, fa, dfa, b, fb, dfb, lower, upper); + + for(int i=0;i f0 + rho * alpha * df0 || fAlpha >= fa){ + b = alpha; + fb = fAlpha; + dfb = 0.0/0.0; + } + else{ + double dfalpha = 0.0000; + + negativeLogDerivEvidenceLambdaPi(xalpha, gradient); + dfalpha = 0.0000; + for(int i=0;i= 0 && dfalpha >= 0) || ((b-a) <= 0.000 && dfalpha <= 0))){ + b = a; fb = fa; dfb = dfa; + a = alpha; fa = fAlpha; dfa = dfalpha; + } + else{ + a = alpha; + fa = fAlpha; + dfa = dfalpha; + } + } + + + } + + return 1; + } + catch(exception& e) { + m->errorOut(e, "qFinderDMM", "lineMinimizeFletcher"); + exit(1); + } +} + +/**************************************************************************************************/ + +int qFinderDMM::bfgs2_Solver(vector& x){ + try{ +// cout << "bfgs2_Solver" << endl; + int bfgsIter = 0; + double step = 1.0e-6; + double delta_f = 0.0000;//f-f0; + + vector gradient; + double f = negativeLogEvidenceLambdaPi(x); + +// cout << "after negLE" << endl; + + negativeLogDerivEvidenceLambdaPi(x, gradient); + +// cout << "after negLDE" << endl; + + vector x0 = x; + vector g0 = gradient; + + double g0norm = 0; + for(int i=0;i p = gradient; + double pNorm = 0; + for(int i=0;i 0.001 && bfgsIter++ < maxIter){ + if (m->control_pressed) { return 0; } + + double f0 = f; + vector dx(numOTUs, 0.0000); + + double alphaOld, alphaNew; + + if(pNorm == 0 || g0norm == 0 || df0 == 0){ + dx.assign(numOTUs, 0.0000); + break; + } + if(delta_f < 0){ + double delta = max(-delta_f, 10 * EPSILON * abs(f0)); + alphaOld = min(1.0, 2.0 * delta / (-df0)); + } + else{ + alphaOld = step; + } + + int success = lineMinimizeFletcher(x0, p, f0, df0, alphaOld, alphaNew, f, x, gradient); + + if(!success){ + x = x0; + break; + } + + delta_f = f - f0; + + vector dx0(numOTUs); + vector dg0(numOTUs); + + for(int i=0;i= 0.0) ? -1.0 : +1.0; + + for(int i=0;ierrorOut(e, "qFinderDMM", "bfgs2_Solver"); + exit(1); + } +} + +/**************************************************************************************************/ + +//can we get these psi/psi1 calculations into their own math class? +//psi calcualtions swiped from gsl library... + +static const double psi_cs[23] = { + -.038057080835217922, + .491415393029387130, + -.056815747821244730, + .008357821225914313, + -.001333232857994342, + .000220313287069308, + -.000037040238178456, + .000006283793654854, + -.000001071263908506, + .000000183128394654, + -.000000031353509361, + .000000005372808776, + -.000000000921168141, + .000000000157981265, + -.000000000027098646, + .000000000004648722, + -.000000000000797527, + .000000000000136827, + -.000000000000023475, + .000000000000004027, + -.000000000000000691, + .000000000000000118, + -.000000000000000020 +}; + +static double apsi_cs[16] = { + -.0204749044678185, + -.0101801271534859, + .0000559718725387, + -.0000012917176570, + .0000000572858606, + -.0000000038213539, + .0000000003397434, + -.0000000000374838, + .0000000000048990, + -.0000000000007344, + .0000000000001233, + -.0000000000000228, + .0000000000000045, + -.0000000000000009, + .0000000000000002, + -.0000000000000000 +}; + +/**************************************************************************************************/ + +double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){ + try { + double d = 0.0000; + double dd = 0.0000; + + double x2 = xx * 2.0000; + + for(int j=order;j>=1;j--){ + if (m->control_pressed) { return 0; } + double temp = d; + d = x2 * d - dd + seriesData[j]; + dd = temp; + } + + d = xx * d - dd + 0.5 * seriesData[0]; + return d; + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "cheb_eval"); + exit(1); + } +} + +/**************************************************************************************************/ + +double qFinderDMM::psi(double xx){ + try { + double psiX = 0.0000; + + if(xx < 1.0000){ + + double t1 = 1.0 / xx; + psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0); + psiX = -t1 + psiX; + + } + else if(xx < 2.0000){ + + const double v = xx - 1.0; + psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0); + + } + else{ + const double t = 8.0/(xx*xx)-1.0; + psiX = cheb_eval(apsi_cs, 15, t); + psiX += log(xx) - 0.5/xx; + } + + return psiX; + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "psi"); + exit(1); + } +} + +/**************************************************************************************************/ + +/* coefficients for Maclaurin summation in hzeta() + * B_{2j}/(2j)! + */ +static double hzeta_c[15] = { + 1.00000000000000000000000000000, + 0.083333333333333333333333333333, + -0.00138888888888888888888888888889, + 0.000033068783068783068783068783069, + -8.2671957671957671957671957672e-07, + 2.0876756987868098979210090321e-08, + -5.2841901386874931848476822022e-10, + 1.3382536530684678832826980975e-11, + -3.3896802963225828668301953912e-13, + 8.5860620562778445641359054504e-15, + -2.1748686985580618730415164239e-16, + 5.5090028283602295152026526089e-18, + -1.3954464685812523340707686264e-19, + 3.5347070396294674716932299778e-21, + -8.9535174270375468504026113181e-23 +}; + +/**************************************************************************************************/ + +double qFinderDMM::psi1(double xx){ + try { + + /* Euler-Maclaurin summation formula + * [Moshier, p. 400, with several typo corrections] + */ + + double s = 2.0000; + const int jmax = 12; + const int kmax = 10; + int j, k; + const double pmax = pow(kmax + xx, -s); + double scp = s; + double pcp = pmax / (kmax + xx); + double value = pmax*((kmax+xx)/(s-1.0) + 0.5); + + for(k=0; kcontrol_pressed) { return 0; } + value += pow(k + xx, -s); + } + + for(j=0; j<=jmax; j++) { + if (m->control_pressed) { return 0; } + double delta = hzeta_c[j+1] * scp * pcp; + value += delta; + + if(fabs(delta/value) < 0.5*EPSILON) break; + + scp *= (s+2*j+1)*(s+2*j+2); + pcp /= (kmax + xx)*(kmax + xx); + } + + return value; + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "psi1"); + exit(1); + } +} + +/**************************************************************************************************/ + +double qFinderDMM::negativeLogEvidenceLambdaPi(vector& x){ + try{ + vector sumAlphaX(numSamples, 0.0000); + + double logEAlpha = 0.0000; + double sumLambda = 0.0000; + double sumAlpha = 0.0000; + double logE = 0.0000; + double nu = 0.10000; + double eta = 0.10000; + + double weight = 0.00000; + for(int i=0;icontrol_pressed) { return 0; } + double lambda = x[i]; + double alpha = exp(x[i]); + logEAlpha += lgamma(alpha); + sumLambda += lambda; + sumAlpha += alpha; + + for(int j=0;jerrorOut(e, "qFinderDMM", "negativeLogEvidenceLambdaPi"); + exit(1); + } +} + +/**************************************************************************************************/ + +void qFinderDMM::negativeLogDerivEvidenceLambdaPi(vector& x, vector& df){ + try{ +// cout << "\tstart negativeLogDerivEvidenceLambdaPi" << endl; + + vector storeVector(numSamples, 0.0000); + vector derivative(numOTUs, 0.0000); + vector alpha(numOTUs, 0.0000); + + double store = 0.0000; + double nu = 0.1000; + double eta = 0.1000; + + double weight = 0.0000; + for(int i=0;icontrol_pressed) { return; } +// cout << "start i loop" << endl; +// +// cout << i << '\t' << alpha[i] << '\t' << x[i] << '\t' << exp(x[i]) << '\t' << store << endl; + + alpha[i] = exp(x[i]); + store += alpha[i]; + +// cout << "before derivative" << endl; + + derivative[i] = weight * psi(alpha[i]); + +// cout << "after derivative" << endl; + +// cout << i << '\t' << alpha[i] << '\t' << psi(alpha[i]) << '\t' << derivative[i] << endl; + + for(int j=0;jerrorOut(e, "qFinderDMM", "negativeLogDerivEvidenceLambdaPi"); + exit(1); + } +} + +/**************************************************************************************************/ + +double qFinderDMM::getNegativeLogEvidence(vector& lambda, int group){ + try { + double sumAlpha = 0.0000; + double sumAlphaX = 0.0000; + double sumLnGamAlpha = 0.0000; + double logEvidence = 0.0000; + + for(int i=0;icontrol_pressed) { return 0; } + double alpha = exp(lambda[i]); + double X = countMatrix[group][i]; + double alphaX = alpha + X; + + sumLnGamAlpha += lgamma(alpha); + sumAlpha += alpha; + sumAlphaX += alphaX; + + logEvidence -= lgamma(alphaX); + } + + sumLnGamAlpha -= lgamma(sumAlpha); + logEvidence += lgamma(sumAlphaX); + + return logEvidence + sumLnGamAlpha; + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "getNegativeLogEvidence"); + exit(1); + } +} + +/**************************************************************************************************/ + +void qFinderDMM::kMeans(){ + try { + + vector > relativeAbundance(numSamples); + vector > alphaMatrix; + + alphaMatrix.resize(numPartitions); + lambdaMatrix.resize(numPartitions); + for(int i=0;icontrol_pressed) { return; } + int groupTotal = 0; + + relativeAbundance[i].assign(numOTUs, 0.0); + + for(int j=0;j 1e-6 && iteration < maxIters){ + + if (m->control_pressed) { return; } + //calcualte average relative abundance + maxChange = 0.0000; + for(int i=0;i averageRelativeAbundance(numOTUs, 0); + for(int j=0;j maxChange){ maxChange = normChange; } + } + + + //calcualte distance between each sample in partition adn the average relative abundance + for(int i=0;icontrol_pressed) { return; } + + double normalizationFactor = 0; + vector totalDistToPartition(numPartitions, 0); + + for(int j=0;jcontrol_pressed) { return; } + for(int j=0;j 0){ + lambdaMatrix[j][i] = log(alphaMatrix[j][i]); + } + else{ + lambdaMatrix[j][i] = -10.0; + } + } + } + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "kMeans"); + exit(1); + } +} + +/**************************************************************************************************/ + +void qFinderDMM::optimizeLambda(){ + try { + for(currentPartition=0;currentPartitioncontrol_pressed) { return; } + bfgs2_Solver(lambdaMatrix[currentPartition]); + } + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "optimizeLambda"); + exit(1); + } +} + +/**************************************************************************************************/ + +void qFinderDMM::calculatePiK(){ + try { + vector store(numPartitions); + + for(int i=0;icontrol_pressed) { return; } + double sum = 0.0000; + double minNegLogEvidence =numeric_limits::max(); + + for(int j=0;jcontrol_pressed) { return; } + zMatrix[j][i] = weights[j] * exp(-(store[j] - minNegLogEvidence)); + sum += zMatrix[j][i]; + } + + for(int j=0;jerrorOut(e, "qFinderDMM", "calculatePiK"); + exit(1); + } + +} + +/**************************************************************************************************/ + +double qFinderDMM::getNegativeLogLikelihood(){ + try { + double eta = 0.10000; + double nu = 0.10000; + + vector pi(numPartitions, 0.0000); + vector logBAlpha(numPartitions, 0.0000); + + double doubleSum = 0.0000; + + for(int i=0;icontrol_pressed) { return 0; } + double sumAlphaK = 0.0000; + + pi[i] = weights[i] / (double)numSamples; + + for(int j=0;jcontrol_pressed) { return 0; } + + double probability = 0.0000; + double factor = 0.0000; + double sum = 0.0000; + vector logStore(numPartitions, 0.0000); + double offset = -numeric_limits::max(); + + for(int j=0;j offset){ + offset = logStore[k]; + } + + } + + for(int k=0;kcontrol_pressed) { return 0; } + alphaSum += exp(lambdaMatrix[i][j]); + lambdaSum += lambdaMatrix[i][j]; + } + } + alphaSum *= -nu; + lambdaSum *= eta; + + return (-doubleSum - L5 - L6 - alphaSum - lambdaSum); + } + catch(exception& e){ + m->errorOut(e, "qFinderDMM", "getNegativeLogLikelihood"); + exit(1); + } + + +} + +/**************************************************************************************************/ + +vector > qFinderDMM::getHessian(){ + try { + vector alpha(numOTUs, 0.0000); + double alphaSum = 0.0000; + + vector pi = zMatrix[currentPartition]; + vector psi_ajk(numOTUs, 0.0000); + vector psi_cjk(numOTUs, 0.0000); + vector psi1_ajk(numOTUs, 0.0000); + vector psi1_cjk(numOTUs, 0.0000); + + for(int j=0;jcontrol_pressed) { break; } + + alpha[j] = exp(lambdaMatrix[currentPartition][j]); + alphaSum += alpha[j]; + + for(int i=0;icontrol_pressed) { break; } + weight += pi[i]; + double sum = 0.0000; + for(int j=0;j > hessian(numOTUs); + for(int i=0;icontrol_pressed) { break; } + double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck); + double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck); + double term3 = 0.1 * alpha[i]; + + hessian[i][i] = term1 + term2 + term3; + + for(int j=0;jerrorOut(e, "qFinderDMM", "getHessian"); + exit(1); + } +} + +/**************************************************************************************************/ diff --git a/qFinderDMM.h b/qFinderDMM.h new file mode 100644 index 0000000..d94e6cc --- /dev/null +++ b/qFinderDMM.h @@ -0,0 +1,69 @@ +// +// qFinderDMM.h +// pds_dmm +// +// Created by Patrick Schloss on 11/8/12. +// Copyright (c) 2012 University of Michigan. All rights reserved. +// + +#ifndef pds_dmm_qFinderDMM_h +#define pds_dmm_qFinderDMM_h + +/**************************************************************************************************/ + +#include "mothurout.h" + +/**************************************************************************************************/ + +class qFinderDMM { + +public: + qFinderDMM(vector >, int); + double getNLL() { return currNLL; } + double getAIC() { return aic; } + double getBIC() { return bic; } + double getLogDet() { return logDeterminant; } + double getLaplace() { return laplace; } + void printZMatrix(string, vector); + void printRelAbund(string, vector); + +private: + MothurOut* m; + void kMeans(); + void optimizeLambda(); + void calculatePiK(); + + double negativeLogEvidenceLambdaPi(vector&); + void negativeLogDerivEvidenceLambdaPi(vector&, vector&); + double getNegativeLogEvidence(vector&, int); + double getNegativeLogLikelihood(); + vector > getHessian(); + + int lineMinimizeFletcher(vector&, vector&, double, double, double, double&, double&, vector&, vector&); + int bfgs2_Solver(vector&);//, double, double); + double cheb_eval(const double[], int, double); + double psi(double); + double psi1(double); + + vector > countMatrix; + vector > zMatrix; + vector > lambdaMatrix; + vector weights; + vector > error; + + int numPartitions; + int numSamples; + int numOTUs; + int currentPartition; + + double currNLL; + double aic; + double bic; + double logDeterminant; + double laplace; + +}; + +/**************************************************************************************************/ + +#endif diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 3901650..7bc333b 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -211,6 +211,18 @@ int SharedRAbundVector::getAbundance(int index){ return data[index].abundance; } +/***********************************************************************/ +//returns vector of abundances +vector SharedRAbundVector::getAbundances(){ + vector abunds; + for (int i = 0; i < data.size(); i++) { + abunds.push_back(data[i].abundance); + } + + return abunds; +} + + /***********************************************************************/ int SharedRAbundVector::numNZ(){ diff --git a/sharedrabundvector.h b/sharedrabundvector.h index 419d15a..6647185 100644 --- a/sharedrabundvector.h +++ b/sharedrabundvector.h @@ -50,6 +50,7 @@ public: individual get(int); vector getData(); int getAbundance(int); + vector getAbundances(); int numNZ(); void sortD(); //Sorts the data in descending order. void push_front(int, int, string); //abundance, otu, groupname -- 2.39.2