A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
+ A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */; };
+ A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */; };
A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; };
A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = "<group>"; };
A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
+ A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = getmetacommunitycommand.h; sourceTree = "<group>"; };
+ A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getmetacommunitycommand.cpp; sourceTree = "<group>"; };
+ A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qFinderDMM.cpp; sourceTree = "<group>"; };
+ A7548FAF171440ED00B1F05A /* qFinderDMM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qFinderDMM.h; sourceTree = "<group>"; };
A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = "<group>"; };
A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcrseqscommand.h; sourceTree = "<group>"; };
A7E9B77C12D37EC400DA6239 /* overlap.hpp */,
A7E9B79B12D37EC400DA6239 /* progress.cpp */,
A7E9B79C12D37EC400DA6239 /* progress.hpp */,
+ A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
+ A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
A7386C191619C9FB00651424 /* randomforest */,
A7E9B7A612D37EC400DA6239 /* rarecalc.h */,
A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */,
A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */,
A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */,
+ A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */,
+ A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */,
A70056E8156A93E300924A2D /* getotulabelscommand.h */,
A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */,
A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */,
A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */,
A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
+ A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */,
+ A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "getdistscommand.h"
#include "removedistscommand.h"
#include "mergetaxsummarycommand.h"
+#include "getmetacommunitycommand.h"
/*******************************************************/
commands["get.dists"] = "get.dists";
commands["remove.dists"] = "remove.dists";
commands["merge.taxsummary"] = "merge.taxsummary";
+ commands["get.metacommunity"] = "get.metacommunity";
}
else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); }
else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); }
else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); }
+ else if(commandName == "get.metacommunity") { command = new GetMetaCommunityCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "get.dists") { pipecommand = new GetDistsCommand(optionString); }
else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); }
else if(commandName == "merge.taxsummary") { pipecommand = new MergeTaxSummaryCommand(optionString); }
+ else if(commandName == "get.metacommunity") { pipecommand = new GetMetaCommunityCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "get.dists") { shellcommand = new GetDistsCommand(); }
else if(commandName == "remove.dists") { shellcommand = new RemoveDistsCommand(); }
else if(commandName == "merge.taxsummary") { shellcommand = new MergeTaxSummaryCommand(); }
+ else if(commandName == "get.metacommunity") { shellcommand = new GetMetaCommunityCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
--- /dev/null
+//
+// getmetacommunitycommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 4/9/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "getmetacommunitycommand.h"
+#include "qFinderDMM.h"
+
+//**********************************************************************************************************************
+vector<string> GetMetaCommunityCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
+ CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
+ CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NewCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string GetMetaCommunityCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n";
+ helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
+ helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n";
+ helpString += "The minpartitions parameter is used to .... Default=5.\n";
+ helpString += "The maxpartitions parameter is used to .... Default=10.\n";
+ helpString += "The optimizegap parameter is used to .... Default=3.\n";
+ helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string GetMetaCommunityCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "fit") { pattern = "[filename],[distance],mix.fit"; }
+ else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; }
+ else if (type == "design") { pattern = "[filename],mix.design"; }
+ else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; }
+ else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; }
+ else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+GetMetaCommunityCommand::GetMetaCommunityCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["fit"] = tempOutNames;
+ outputTypes["relabund"] = tempOutNames;
+ outputTypes["matrix"] = tempOutNames;
+ outputTypes["design"] = tempOutNames;
+ outputTypes["parameters"] = tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+GetMetaCommunityCommand::GetMetaCommunityCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+ allLines=true;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["fit"] = tempOutNames;
+ outputTypes["relabund"] = tempOutNames;
+ outputTypes["matrix"] = tempOutNames;
+ outputTypes["design"] = tempOutNames;
+ outputTypes["parameters"] = tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("shared");
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+ }
+
+ string temp = validParameter.validFile(parameters, "minpartitions", false); if (temp == "not found"){ temp = "5"; }
+ m->mothurConvert(temp, minpartitions);
+
+ temp = validParameter.validFile(parameters, "maxpartitions", false); if (temp == "not found"){ temp = "10"; }
+ m->mothurConvert(temp, maxpartitions);
+
+ temp = validParameter.validFile(parameters, "optimizegap", false); if (temp == "not found"){ temp = "3"; }
+ m->mothurConvert(temp, optimizegap);
+
+ string groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else { m->splitAtDash(groups, Groups); }
+ m->setGroups(Groups);
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int GetMetaCommunityCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) { return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int GetMetaCommunityCommand::process(vector<SharedRAbundVector*>& thislookup){
+ try {
+
+ double minLaplace = 1e10;
+ int minPartition = 0;
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = thislookup[0]->getLabel();
+ string outputFileName = getOutputFileName("fit", variables);
+ outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
+
+ ofstream fitData;
+ m->openOutputFile(outputFileName, fitData);
+ fitData.setf(ios::fixed, ios::floatfield);
+ fitData.setf(ios::showpoint);
+ cout.setf(ios::fixed, ios::floatfield);
+ cout.setf(ios::showpoint);
+
+ vector< vector<int> > sharedMatrix;
+ for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); }
+
+ m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+ fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+
+ for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){
+
+ if (m->control_pressed) { break; }
+
+ qFinderDMM findQ(sharedMatrix, numPartitions);
+
+ double laplace = findQ.getLaplace();
+ m->mothurOut(toString(numPartitions) + '\t');
+ cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
+ m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
+ cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
+ m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
+
+ fitData << numPartitions << '\t';
+ fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
+ fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
+
+ if(laplace < minLaplace){
+ minPartition = numPartitions;
+ minLaplace = laplace;
+ m->mothurOut("***");
+ }
+ m->mothurOutEndLine();
+
+ variables["[tag]"] = toString(numPartitions);
+ string relabund = getOutputFileName("relabund", variables);
+ outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+ string matrix = getOutputFileName("matrix", variables);
+ outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix);
+
+ findQ.printZMatrix(matrix, m->getGroups());
+ findQ.printRelAbund(relabund, m->currentBinLabels);
+
+ if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break; }
+ }
+ fitData.close();
+
+ //minPartition = 4;
+
+ if (m->control_pressed) { return 0; }
+
+ generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel());
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "process");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){
+ try {
+ vector<double> piValues(numPartitions, 0);
+
+ ifstream postFile;
+ m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input));
+ string outputFileName = getOutputFileName("design", variables);
+ ofstream designFile;
+ m->openOutputFile(outputFileName, designFile);
+ outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName);
+
+
+ vector<string> titles(numPartitions);
+
+ for(int i=0;i<numPartitions;i++){ postFile >> titles[i]; }
+
+ double posterior;
+ string sampleName;
+ int numSamples = 0;
+
+ while(postFile){
+
+ if (m->control_pressed) { break; }
+
+ double maxPosterior = 0.0000;
+ int maxPartition = -1;
+
+ postFile >> sampleName;
+
+ for(int i=0;i<numPartitions;i++){
+
+ postFile >> posterior;
+ if(posterior > maxPosterior){
+ maxPosterior = posterior;
+ maxPartition = i;
+ }
+ piValues[i] += posterior;
+
+ }
+
+ designFile << sampleName << '\t' << titles[maxPartition] << endl;
+
+ numSamples++;
+ m->gobble(postFile);
+ }
+ for(int i=0;i<numPartitions;i++){
+ piValues[i] /= (double)numSamples;
+ }
+
+
+ postFile.close();
+ designFile.close();
+
+ return piValues;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "generateDesignFile");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; }
+
+/**************************************************************************************************/
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){
+ try {
+ vector<summaryData> summary;
+
+ vector<double> pMean(numPartitions, 0);
+ vector<double> pLCI(numPartitions, 0);
+ vector<double> pUCI(numPartitions, 0);
+
+ string name, header;
+ double mean, lci, uci;
+
+
+ vector<double> piValues = generateDesignFile(numPartitions, designInput);
+
+ ifstream referenceFile;
+ m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
+ ifstream partitionFile;
+ m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str());
+
+ header = m->getline(referenceFile);
+ header = m->getline(partitionFile);
+ stringstream head(header);
+ string dummy, label;
+ head >> dummy;
+ vector<string> thetaValues(numPartitions, "");
+ for(int i=0;i<numPartitions;i++){
+ head >> label >> dummy >> dummy;
+ thetaValues[i] = label.substr(label.find_last_of('_')+1);
+ }
+
+
+ vector<double> partitionDiff(numPartitions, 0.0000);
+
+ while(referenceFile){
+
+ if (m->control_pressed) { break; }
+
+ referenceFile >> name >> mean >> lci >> uci;
+
+ summaryData tempData;
+ tempData.name = name;
+ tempData.refMean = mean;
+
+ double difference = 0.0000;
+
+ partitionFile >> name;
+ for(int j=0;j<numPartitions;j++){
+ partitionFile >> pMean[j] >> pLCI[j] >> pUCI[j];
+ difference += abs(mean - pMean[j]);
+ partitionDiff[j] += abs(mean - pMean[j]);;
+ }
+
+ tempData.partMean = pMean;
+ tempData.partLCI = pLCI;
+ tempData.partUCI = pUCI;
+ tempData.difference = difference;
+ summary.push_back(tempData);
+
+ m->gobble(referenceFile);
+ m->gobble(partitionFile);
+ }
+ referenceFile.close();
+ partitionFile.close();
+
+ if (m->control_pressed) { return 0; }
+
+ int numOTUs = (int)summary.size();
+
+ sort(summary.begin(), summary.end(), summaryFunction);
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = label;
+ string outputFileName = getOutputFileName("parameters", variables);
+ outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName);
+
+ ofstream parameterFile;
+ m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str());
+ parameterFile.setf(ios::fixed, ios::floatfield);
+ parameterFile.setf(ios::showpoint);
+
+ double totalDifference = 0.0000;
+ parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n";
+ for(int i=0;i<numPartitions;i++){
+ if (m->control_pressed) { break; }
+ parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl;
+ totalDifference += partitionDiff[i];
+ }
+ parameterFile.close();
+
+ if (m->control_pressed) { return 0; }
+
+ string summaryFileName = getOutputFileName("summary", variables);
+ outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+
+ ofstream summaryFile;
+ m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str());
+ summaryFile.setf(ios::fixed, ios::floatfield);
+ summaryFile.setf(ios::showpoint);
+
+
+ summaryFile << "OTU\tP0.mean";
+ for(int i=0;i<numPartitions;i++){
+ summaryFile << "\tP" << i+1 << ".mean\tP" << i+1 << ".lci\tP" << i+1 << ".uci";
+ }
+ summaryFile << "\tDifference\tCumFraction" << endl;
+
+ double cumDiff = 0.0000;
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { break; }
+ summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean;
+ for(int j=0;j<numPartitions;j++){
+ summaryFile << '\t' << summary[i].partMean[j] << '\t' << summary[i].partLCI[j] << '\t' << summary[i].partUCI[j];
+ }
+
+ cumDiff += summary[i].difference/totalDifference;
+ summaryFile << '\t' << summary[i].difference << '\t' << cumDiff << endl;
+ }
+ summaryFile.close();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "generateSummaryFile");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+
--- /dev/null
+//
+// getmetacommunitycommand.h
+// Mothur
+//
+// Created by SarahsWork on 4/9/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_getmetacommunitycommand_h
+#define Mothur_getmetacommunitycommand_h
+
+#include "command.hpp"
+#include "inputdata.h"
+
+/**************************************************************************************************/
+
+class GetMetaCommunityCommand : public Command {
+public:
+ GetMetaCommunityCommand(string);
+ GetMetaCommunityCommand();
+ ~GetMetaCommunityCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "get.metacommunity"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+
+ string getOutputPattern(string);
+
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Get.metacommunity"; }
+ string getDescription() { return "brief description"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines;
+ string outputDir;
+ vector<string> outputNames;
+ string sharedfile;
+ int minpartitions, maxpartitions, optimizegap;
+ vector<string> Groups;
+ set<string> labels;
+
+ int process(vector<SharedRAbundVector*>&);
+ vector<double> generateDesignFile(int, string);
+ int generateSummaryFile(int, string, string, string, string);
+
+};
+
+/**************************************************************************************************/
+struct summaryData {
+
+ string name;
+ double refMean, difference;
+ vector<double> partMean, partLCI, partUCI;
+
+};
+/**************************************************************************************************/
+
+
+
+
+#endif
/*********************************************************************************************************************************/
vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
+ try {
- int numSamples = relAbundData.size();
- int numOTUs = relAbundData[0].size();
-
- vector<vector<double> > dMatrix(numSamples);
- for(int i=0;i<numSamples;i++){
- dMatrix[i].resize(numSamples);
+ int numSamples = relAbundData.size();
+ int numOTUs = relAbundData[0].size();
+
+ vector<vector<double> > dMatrix(numSamples);
+ for(int i=0;i<numSamples;i++){
+ dMatrix[i].resize(numSamples);
+ }
+
+ for(int i=0;i<numSamples;i++){
+ for(int j=0;j<numSamples;j++){
+
+ if (m->control_pressed) { return dMatrix; }
+
+ double d = 0;
+ for(int k=0;k<numOTUs;k++){
+ d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
+ }
+ dMatrix[i][j] = pow(d, 0.50000);
+ dMatrix[j][i] = dMatrix[i][j];
+
+ }
+ }
+ return dMatrix;
}
-
- for(int i=0;i<numSamples;i++){
- for(int j=0;j<numSamples;j++){
-
- double d = 0;
- for(int k=0;k<numOTUs;k++){
- d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
- }
- dMatrix[i][j] = pow(d, 0.50000);
- dMatrix[j][i] = dMatrix[i][j];
-
- }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "getObservedEuclideanDistance");
+ exit(1);
+ }
+}
+
+/*********************************************************************************************************************************/
+vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<double> b){
+ try {
+ int length = (int)b.size();
+ vector<double> x(length, 0);
+ vector<int> index(length);
+ for(int i=0;i<length;i++){ index[i] = i; }
+ double d;
+
+ ludcmp(A, index, d); if (m->control_pressed) { return b; }
+ lubksb(A, index, b);
+
+ return b;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "solveEquations");
+ exit(1);
+ }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<double> >& A, vector<int>& index, double& d){
+ try {
+ double tiny = 1e-20;
+
+ int n = (int)A.size();
+ vector<double> vv(n, 0.0);
+ double temp;
+ int imax;
+
+ d = 1.0;
+
+ for(int i=0;i<n;i++){
+ double big = 0.0;
+ for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
+ if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
+ vv[i] = 1.0/big;
+ }
+
+ for(int j=0;j<n;j++){
+ if (m->control_pressed) { break; }
+ for(int i=0;i<j;i++){
+ double sum = A[i][j];
+ for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
+ A[i][j] = sum;
+ }
+
+ double big = 0.0;
+ for(int i=j;i<n;i++){
+ double sum = A[i][j];
+ for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
+ A[i][j] = sum;
+ double dum;
+ if((dum = vv[i] * fabs(sum)) >= big){
+ big = dum;
+ imax = i;
+ }
+ }
+ if(j != imax){
+ for(int k=0;k<n;k++){
+ double dum = A[imax][k];
+ A[imax][k] = A[j][k];
+ A[j][k] = dum;
+ }
+ d = -d;
+ vv[imax] = vv[j];
+ }
+ index[j] = imax;
+
+ if(A[j][j] == 0.0){ A[j][j] = tiny; }
+
+ if(j != n-1){
+ double dum = 1.0/A[j][j];
+ for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "ludcmp");
+ exit(1);
+ }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vector<double>& b){
+ try {
+ double total;
+ int n = (int)A.size();
+ int ii = 0;
+
+ for(int i=0;i<n;i++){
+ if (m->control_pressed) { break; }
+ int ip = index[i];
+ total = b[ip];
+ b[ip] = b[i];
+
+ if (ii != 0) {
+ for(int j=ii-1;j<i;j++){
+ total -= A[i][j] * b[j];
+ }
+ }
+ else if(total != 0){ ii = i+1; }
+ b[i] = total;
+ }
+ for(int i=n-1;i>=0;i--){
+ total = b[i];
+ for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
+ b[i] = total / A[i][i];
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "lubksb");
+ exit(1);
+ }
+}
+
+/*********************************************************************************************************************************/
+
+vector<vector<double> > LinearAlgebra::getInverse(vector<vector<double> > matrix){
+ try {
+ int n = (int)matrix.size();
+
+ vector<vector<double> > inverse(n);
+ for(int i=0;i<n;i++){ inverse[i].assign(n, 0.0000); }
+
+ vector<double> column(n, 0.0000);
+ vector<int> index(n, 0);
+ double dummy;
+
+ ludcmp(matrix, index, dummy);
+
+ for(int j=0;j<n;j++){
+ if (m->control_pressed) { break; }
+
+ column.assign(n, 0);
+
+ column[j] = 1.0000;
+
+ lubksb(matrix, index, column);
+
+ for(int i=0;i<n;i++){ inverse[i][j] = column[i]; }
+ }
+
+ return inverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "getInverse");
+ exit(1);
}
- return dMatrix;
-
}
/*********************************************************************************************************************************/
double calcPearsonSig(double, double); //length, coeff.
double calcKendallSig(double, double); //length, coeff.
+ vector<double> solveEquations(vector<vector<double> >, vector<double>);
+ vector<vector<double> > getInverse(vector<vector<double> >);
+
private:
MothurOut* m;
double ran4(int&); //for testing
void psdes(unsigned long &, unsigned long &); //for testing
+ void ludcmp(vector<vector<double> >&, vector<int>&, double&);
+ void lubksb(vector<vector<double> >&, vector<int>&, vector<double>&);
+
+
};
#endif
--- /dev/null
+//
+// qFinderDMM.cpp
+// pds_dmm
+//
+// Created by Patrick Schloss on 11/8/12.
+// Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "qFinderDMM.h"
+#include "linearalgebra.h"
+
+#define EPSILON numeric_limits<double>::epsilon()
+
+/**************************************************************************************************/
+
+qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
+ try {
+ m = MothurOut::getInstance();
+ numSamples = (int)countMatrix.size();
+ numOTUs = (int)countMatrix[0].size();
+
+
+ kMeans();
+ // cout << "done kMeans" << endl;
+
+ optimizeLambda();
+
+
+ // cout << "done optimizeLambda" << endl;
+
+ double change = 1.0000;
+ currNLL = 0.0000;
+
+ int iter = 0;
+
+ while(change > 1.0e-6 && iter < 100){
+
+ // cout << "Calc_Z: ";
+ calculatePiK();
+
+ optimizeLambda();
+
+ // printf("Iter:%d\t",iter);
+
+ for(int i=0;i<numPartitions;i++){
+ weights[i] = 0.0000;
+ for(int j=0;j<numSamples;j++){
+ weights[i] += zMatrix[i][j];
+ }
+ // printf("w_%d=%.3f\t",i,weights[i]);
+
+ }
+
+ double nLL = getNegativeLogLikelihood();
+
+ change = abs(nLL - currNLL);
+
+ currNLL = nLL;
+
+ // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
+
+ iter++;
+ }
+
+ error.resize(numPartitions);
+
+ logDeterminant = 0.0000;
+
+ LinearAlgebra l;
+
+ for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
+
+ error[currentPartition].assign(numOTUs, 0.0000);
+
+ if(currentPartition > 0){
+ logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
+ }
+ vector<vector<double> > hessian = getHessian();
+ vector<vector<double> > invHessian = l.getInverse(hessian);
+
+ for(int i=0;i<numOTUs;i++){
+ logDeterminant += log(abs(hessian[i][i]));
+ error[currentPartition][i] = invHessian[i][i];
+ }
+
+ }
+
+ int numParameters = numPartitions * numOTUs + numPartitions - 1;
+ laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
+ bic = currNLL + 0.5 * log(numSamples) * numParameters;
+ aic = currNLL + numParameters;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "qFinderDMM", "qFinderDMM");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::printZMatrix(string fileName, vector<string> sampleName){
+ try {
+ ofstream printMatrix;
+ m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
+ printMatrix.setf(ios::fixed, ios::floatfield);
+ printMatrix.setf(ios::showpoint);
+
+ for(int i=0;i<numPartitions;i++){ printMatrix << "\tPartition_" << i+1; } printMatrix << endl;
+
+ for(int i=0;i<numSamples;i++){
+ printMatrix << sampleName[i];
+ for(int j=0;j<numPartitions;j++){
+ printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
+ }
+ printMatrix << endl;
+ }
+ printMatrix.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "qFinderDMM", "printZMatrix");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::printRelAbund(string fileName, vector<string> otuNames){
+ try {
+ ofstream printRA;
+ m->openOutputFile(fileName, printRA); //(fileName.c_str());
+ printRA.setf(ios::fixed, ios::floatfield);
+ printRA.setf(ios::showpoint);
+
+ vector<double> totals(numPartitions, 0.0000);
+ for(int i=0;i<numPartitions;i++){
+ for(int j=0;j<numOTUs;j++){
+ totals[i] += exp(lambdaMatrix[i][j]);
+ }
+ }
+
+ printRA << "Taxon";
+ for(int i=0;i<numPartitions;i++){
+ printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
+ printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
+ }
+ printRA << endl;
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { break; }
+
+ printRA << otuNames[i];
+ for(int j=0;j<numPartitions;j++){
+
+ if(error[j][i] >= 0.0000){
+ double std = sqrt(error[j][i]);
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
+ }
+ else{
+ printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+ printRA << '\t' << "NA";
+ printRA << '\t' << "NA";
+ }
+ }
+ printRA << endl;
+ }
+
+ printRA.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "qFinderDMM", "printRelAbund");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+// these functions for bfgs2 solver were lifted from the gnu_gsl source code...
+
+/* Find a minimum in x=[0,1] of the interpolating quadratic through
+ * (0,f0) (1,f1) with derivative fp0 at x=0. The interpolating
+ * polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2
+ */
+
+static double
+interp_quad (double f0, double fp0, double f1, double zl, double zh)
+{
+ double fl = f0 + zl*(fp0 + zl*(f1 - f0 -fp0));
+ double fh = f0 + zh*(fp0 + zh*(f1 - f0 -fp0));
+ double c = 2 * (f1 - f0 - fp0); /* curvature */
+
+ double zmin = zl, fmin = fl;
+
+ if (fh < fmin) { zmin = zh; fmin = fh; }
+
+ if (c > 0) /* positive curvature required for a minimum */
+ {
+ double z = -fp0 / c; /* location of minimum */
+ if (z > zl && z < zh) {
+ double f = f0 + z*(fp0 + z*(f1 - f0 -fp0));
+ if (f < fmin) { zmin = z; fmin = f; };
+ }
+ }
+
+ return zmin;
+}
+
+/**************************************************************************************************/
+
+/* Find a minimum in x=[0,1] of the interpolating cubic through
+ * (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1.
+ *
+ * The interpolating polynomial is:
+ *
+ * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
+ *
+ * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
+ */
+
+double cubic (double c0, double c1, double c2, double c3, double z){
+ return c0 + z * (c1 + z * (c2 + z * c3));
+}
+
+/**************************************************************************************************/
+
+void check_extremum (double c0, double c1, double c2, double c3, double z,
+ double *zmin, double *fmin){
+ /* could make an early return by testing curvature >0 for minimum */
+
+ double y = cubic (c0, c1, c2, c3, z);
+
+ if (y < *fmin)
+ {
+ *zmin = z; /* accepted new point*/
+ *fmin = y;
+ }
+}
+
+/**************************************************************************************************/
+
+int gsl_poly_solve_quadratic (double a, double b, double c,
+ double *x0, double *x1)
+{
+
+ double disc = b * b - 4 * a * c;
+
+ if (a == 0) /* Handle linear case */
+ {
+ if (b == 0)
+ {
+ return 0;
+ }
+ else
+ {
+ *x0 = -c / b;
+ return 1;
+ };
+ }
+
+ if (disc > 0)
+ {
+ if (b == 0)
+ {
+ double r = fabs (0.5 * sqrt (disc) / a);
+ *x0 = -r;
+ *x1 = r;
+ }
+ else
+ {
+ double sgnb = (b > 0 ? 1 : -1);
+ double temp = -0.5 * (b + sgnb * sqrt (disc));
+ double r1 = temp / a ;
+ double r2 = c / temp ;
+
+ if (r1 < r2)
+ {
+ *x0 = r1 ;
+ *x1 = r2 ;
+ }
+ else
+ {
+ *x0 = r2 ;
+ *x1 = r1 ;
+ }
+ }
+ return 2;
+ }
+ else if (disc == 0)
+ {
+ *x0 = -0.5 * b / a ;
+ *x1 = -0.5 * b / a ;
+ return 2 ;
+ }
+ else
+ {
+ return 0;
+ }
+
+}
+
+/**************************************************************************************************/
+
+double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, double zh){
+ double eta = 3 * (f1 - f0) - 2 * fp0 - fp1;
+ double xi = fp0 + fp1 - 2 * (f1 - f0);
+ double c0 = f0, c1 = fp0, c2 = eta, c3 = xi;
+ double zmin, fmin;
+ double z0, z1;
+
+ zmin = zl; fmin = cubic(c0, c1, c2, c3, zl);
+ check_extremum (c0, c1, c2, c3, zh, &zmin, &fmin);
+
+ {
+ int n = gsl_poly_solve_quadratic (3 * c3, 2 * c2, c1, &z0, &z1);
+
+ if (n == 2) /* found 2 roots */
+ {
+ if (z0 > zl && z0 < zh)
+ check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
+ if (z1 > zl && z1 < zh)
+ check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
+ }
+ else if (n == 1) /* found 1 root */
+ {
+ if (z0 > zl && z0 < zh)
+ check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
+ }
+ }
+
+ return zmin;
+}
+
+/**************************************************************************************************/
+
+double interpolate (double a, double fa, double fpa,
+ double b, double fb, double fpb, double xmin, double xmax){
+ /* Map [a,b] to [0,1] */
+ double z, alpha, zmin, zmax;
+
+ zmin = (xmin - a) / (b - a);
+ zmax = (xmax - a) / (b - a);
+
+ if (zmin > zmax)
+ {
+ double tmp = zmin;
+ zmin = zmax;
+ zmax = tmp;
+ };
+
+ if(!isnan(fpb) ){
+ z = interp_cubic (fa, fpa * (b - a), fb, fpb * (b - a), zmin, zmax);
+ }
+ else{
+ z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
+ }
+
+
+ alpha = a + z * (b - a);
+
+ return alpha;
+}
+
+/**************************************************************************************************/
+
+int qFinderDMM::lineMinimizeFletcher(vector<double>& x, vector<double>& p, double f0, double df0, double alpha1, double& alphaNew, double& fAlpha, vector<double>& xalpha, vector<double>& gradient ){
+ try {
+
+ double rho = 0.01;
+ double sigma = 0.10;
+ double tau1 = 9.00;
+ double tau2 = 0.05;
+ double tau3 = 0.50;
+
+ double alpha = alpha1;
+ double alpha_prev = 0.0000;
+
+ xalpha.resize(numOTUs, 0.0000);
+
+ double falpha_prev = f0;
+ double dfalpha_prev = df0;
+
+ double a = 0.0000; double b = alpha;
+ double fa = f0; double fb = 0.0000;
+ double dfa = df0; double dfb = 0.0/0.0;
+
+ int iter = 0;
+ int maxIters = 100;
+ while(iter++ < maxIters){
+ if (m->control_pressed) { break; }
+
+ for(int i=0;i<numOTUs;i++){
+ xalpha[i] = x[i] + alpha * p[i];
+ }
+
+ fAlpha = negativeLogEvidenceLambdaPi(xalpha);
+
+ if(fAlpha > f0 + alpha * rho * df0 || fAlpha >= falpha_prev){
+ a = alpha_prev; b = alpha;
+ fa = falpha_prev; fb = fAlpha;
+ dfa = dfalpha_prev; dfb = 0.0/0.0;
+ break;
+ }
+
+ negativeLogDerivEvidenceLambdaPi(xalpha, gradient);
+ double dfalpha = 0.0000;
+ for(int i=0;i<numOTUs;i++){ dfalpha += gradient[i] * p[i]; }
+
+ if(abs(dfalpha) <= -sigma * df0){
+ alphaNew = alpha;
+ return 1;
+ }
+
+ if(dfalpha >= 0){
+ a = alpha; b = alpha_prev;
+ fa = fAlpha; fb = falpha_prev;
+ dfa = dfalpha; dfb = dfalpha_prev;
+ break;
+ }
+
+ double delta = alpha - alpha_prev;
+
+ double lower = alpha + delta;
+ double upper = alpha + tau1 * delta;
+
+ double alphaNext = interpolate(alpha_prev, falpha_prev, dfalpha_prev, alpha, fAlpha, dfalpha, lower, upper);
+
+ alpha_prev = alpha;
+ falpha_prev = fAlpha;
+ dfalpha_prev = dfalpha;
+ alpha = alphaNext;
+ }
+
+ iter = 0;
+ while(iter++ < maxIters){
+ if (m->control_pressed) { break; }
+
+ double delta = b - a;
+
+ double lower = a + tau2 * delta;
+ double upper = b - tau3 * delta;
+
+ alpha = interpolate(a, fa, dfa, b, fb, dfb, lower, upper);
+
+ for(int i=0;i<numOTUs;i++){
+ xalpha[i] = x[i] + alpha * p[i];
+ }
+
+ fAlpha = negativeLogEvidenceLambdaPi(xalpha);
+
+ if((a - alpha) * dfa <= EPSILON){
+ return 0;
+ }
+
+ if(fAlpha > f0 + rho * alpha * df0 || fAlpha >= fa){
+ b = alpha;
+ fb = fAlpha;
+ dfb = 0.0/0.0;
+ }
+ else{
+ double dfalpha = 0.0000;
+
+ negativeLogDerivEvidenceLambdaPi(xalpha, gradient);
+ dfalpha = 0.0000;
+ for(int i=0;i<numOTUs;i++){ dfalpha += gradient[i] * p[i]; }
+
+ if(abs(dfalpha) <= -sigma * df0){
+ alphaNew = alpha;
+ return 1;
+ }
+
+ if(((b-a >= 0 && dfalpha >= 0) || ((b-a) <= 0.000 && dfalpha <= 0))){
+ b = a; fb = fa; dfb = dfa;
+ a = alpha; fa = fAlpha; dfa = dfalpha;
+ }
+ else{
+ a = alpha;
+ fa = fAlpha;
+ dfa = dfalpha;
+ }
+ }
+
+
+ }
+
+ return 1;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "qFinderDMM", "lineMinimizeFletcher");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+int qFinderDMM::bfgs2_Solver(vector<double>& x){
+ try{
+// cout << "bfgs2_Solver" << endl;
+ int bfgsIter = 0;
+ double step = 1.0e-6;
+ double delta_f = 0.0000;//f-f0;
+
+ vector<double> gradient;
+ double f = negativeLogEvidenceLambdaPi(x);
+
+// cout << "after negLE" << endl;
+
+ negativeLogDerivEvidenceLambdaPi(x, gradient);
+
+// cout << "after negLDE" << endl;
+
+ vector<double> x0 = x;
+ vector<double> g0 = gradient;
+
+ double g0norm = 0;
+ for(int i=0;i<numOTUs;i++){
+ g0norm += g0[i] * g0[i];
+ }
+ g0norm = sqrt(g0norm);
+
+ vector<double> p = gradient;
+ double pNorm = 0;
+ for(int i=0;i<numOTUs;i++){
+ p[i] *= -1 / g0norm;
+ pNorm += p[i] * p[i];
+ }
+ pNorm = sqrt(pNorm);
+ double df0 = -g0norm;
+
+ int maxIter = 5000;
+
+// cout << "before while" << endl;
+
+ while(g0norm > 0.001 && bfgsIter++ < maxIter){
+ if (m->control_pressed) { return 0; }
+
+ double f0 = f;
+ vector<double> dx(numOTUs, 0.0000);
+
+ double alphaOld, alphaNew;
+
+ if(pNorm == 0 || g0norm == 0 || df0 == 0){
+ dx.assign(numOTUs, 0.0000);
+ break;
+ }
+ if(delta_f < 0){
+ double delta = max(-delta_f, 10 * EPSILON * abs(f0));
+ alphaOld = min(1.0, 2.0 * delta / (-df0));
+ }
+ else{
+ alphaOld = step;
+ }
+
+ int success = lineMinimizeFletcher(x0, p, f0, df0, alphaOld, alphaNew, f, x, gradient);
+
+ if(!success){
+ x = x0;
+ break;
+ }
+
+ delta_f = f - f0;
+
+ vector<double> dx0(numOTUs);
+ vector<double> dg0(numOTUs);
+
+ for(int i=0;i<numOTUs;i++){
+ dx0[i] = x[i] - x0[i];
+ dg0[i] = gradient[i] - g0[i];
+ }
+
+ double dxg = 0;
+ double dgg = 0;
+ double dxdg = 0;
+ double dgnorm = 0;
+
+ for(int i=0;i<numOTUs;i++){
+ dxg += dx0[i] * gradient[i];
+ dgg += dg0[i] * gradient[i];
+ dxdg += dx0[i] * dg0[i];
+ dgnorm += dg0[i] * dg0[i];
+ }
+ dgnorm = sqrt(dgnorm);
+
+ double A, B;
+
+ if(dxdg != 0){
+ B = dxg / dxdg;
+ A = -(1.0 + dgnorm*dgnorm /dxdg) * B + dgg / dxdg;
+ }
+ else{
+ B = 0;
+ A = 0;
+ }
+
+ for(int i=0;i<numOTUs;i++){ p[i] = gradient[i] - A * dx0[i] - B * dg0[i]; }
+
+ x0 = x;
+ g0 = gradient;
+
+
+ double pg = 0;
+ pNorm = 0.0000;
+ g0norm = 0.0000;
+
+ for(int i=0;i<numOTUs;i++){
+ pg += p[i] * gradient[i];
+ pNorm += p[i] * p[i];
+ g0norm += g0[i] * g0[i];
+ }
+ pNorm = sqrt(pNorm);
+ g0norm = sqrt(g0norm);
+
+ double dir = (pg >= 0.0) ? -1.0 : +1.0;
+
+ for(int i=0;i<numOTUs;i++){ p[i] *= dir / pNorm; }
+
+ pNorm = 0.0000;
+ df0 = 0.0000;
+ for(int i=0;i<numOTUs;i++){
+ pNorm += p[i] * p[i];
+ df0 += p[i] * g0[i];
+ }
+
+ pNorm = sqrt(pNorm);
+
+ }
+// cout << "bfgsIter:\t" << bfgsIter << endl;
+
+ return bfgsIter;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "bfgs2_Solver");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+//can we get these psi/psi1 calculations into their own math class?
+//psi calcualtions swiped from gsl library...
+
+static const double psi_cs[23] = {
+ -.038057080835217922,
+ .491415393029387130,
+ -.056815747821244730,
+ .008357821225914313,
+ -.001333232857994342,
+ .000220313287069308,
+ -.000037040238178456,
+ .000006283793654854,
+ -.000001071263908506,
+ .000000183128394654,
+ -.000000031353509361,
+ .000000005372808776,
+ -.000000000921168141,
+ .000000000157981265,
+ -.000000000027098646,
+ .000000000004648722,
+ -.000000000000797527,
+ .000000000000136827,
+ -.000000000000023475,
+ .000000000000004027,
+ -.000000000000000691,
+ .000000000000000118,
+ -.000000000000000020
+};
+
+static double apsi_cs[16] = {
+ -.0204749044678185,
+ -.0101801271534859,
+ .0000559718725387,
+ -.0000012917176570,
+ .0000000572858606,
+ -.0000000038213539,
+ .0000000003397434,
+ -.0000000000374838,
+ .0000000000048990,
+ -.0000000000007344,
+ .0000000000001233,
+ -.0000000000000228,
+ .0000000000000045,
+ -.0000000000000009,
+ .0000000000000002,
+ -.0000000000000000
+};
+
+/**************************************************************************************************/
+
+double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){
+ try {
+ double d = 0.0000;
+ double dd = 0.0000;
+
+ double x2 = xx * 2.0000;
+
+ for(int j=order;j>=1;j--){
+ if (m->control_pressed) { return 0; }
+ double temp = d;
+ d = x2 * d - dd + seriesData[j];
+ dd = temp;
+ }
+
+ d = xx * d - dd + 0.5 * seriesData[0];
+ return d;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "cheb_eval");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::psi(double xx){
+ try {
+ double psiX = 0.0000;
+
+ if(xx < 1.0000){
+
+ double t1 = 1.0 / xx;
+ psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
+ psiX = -t1 + psiX;
+
+ }
+ else if(xx < 2.0000){
+
+ const double v = xx - 1.0;
+ psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
+
+ }
+ else{
+ const double t = 8.0/(xx*xx)-1.0;
+ psiX = cheb_eval(apsi_cs, 15, t);
+ psiX += log(xx) - 0.5/xx;
+ }
+
+ return psiX;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "psi");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+/* coefficients for Maclaurin summation in hzeta()
+ * B_{2j}/(2j)!
+ */
+static double hzeta_c[15] = {
+ 1.00000000000000000000000000000,
+ 0.083333333333333333333333333333,
+ -0.00138888888888888888888888888889,
+ 0.000033068783068783068783068783069,
+ -8.2671957671957671957671957672e-07,
+ 2.0876756987868098979210090321e-08,
+ -5.2841901386874931848476822022e-10,
+ 1.3382536530684678832826980975e-11,
+ -3.3896802963225828668301953912e-13,
+ 8.5860620562778445641359054504e-15,
+ -2.1748686985580618730415164239e-16,
+ 5.5090028283602295152026526089e-18,
+ -1.3954464685812523340707686264e-19,
+ 3.5347070396294674716932299778e-21,
+ -8.9535174270375468504026113181e-23
+};
+
+/**************************************************************************************************/
+
+double qFinderDMM::psi1(double xx){
+ try {
+
+ /* Euler-Maclaurin summation formula
+ * [Moshier, p. 400, with several typo corrections]
+ */
+
+ double s = 2.0000;
+ const int jmax = 12;
+ const int kmax = 10;
+ int j, k;
+ const double pmax = pow(kmax + xx, -s);
+ double scp = s;
+ double pcp = pmax / (kmax + xx);
+ double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
+
+ for(k=0; k<kmax; k++) {
+ if (m->control_pressed) { return 0; }
+ value += pow(k + xx, -s);
+ }
+
+ for(j=0; j<=jmax; j++) {
+ if (m->control_pressed) { return 0; }
+ double delta = hzeta_c[j+1] * scp * pcp;
+ value += delta;
+
+ if(fabs(delta/value) < 0.5*EPSILON) break;
+
+ scp *= (s+2*j+1)*(s+2*j+2);
+ pcp /= (kmax + xx)*(kmax + xx);
+ }
+
+ return value;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "psi1");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::negativeLogEvidenceLambdaPi(vector<double>& x){
+ try{
+ vector<double> sumAlphaX(numSamples, 0.0000);
+
+ double logEAlpha = 0.0000;
+ double sumLambda = 0.0000;
+ double sumAlpha = 0.0000;
+ double logE = 0.0000;
+ double nu = 0.10000;
+ double eta = 0.10000;
+
+ double weight = 0.00000;
+ for(int i=0;i<numSamples;i++){
+ weight += zMatrix[currentPartition][i];
+ }
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return 0; }
+ double lambda = x[i];
+ double alpha = exp(x[i]);
+ logEAlpha += lgamma(alpha);
+ sumLambda += lambda;
+ sumAlpha += alpha;
+
+ for(int j=0;j<numSamples;j++){
+ double X = countMatrix[j][i];
+ double alphaX = alpha + X;
+ sumAlphaX[j] += alphaX;
+
+ logE -= zMatrix[currentPartition][j] * lgamma(alphaX);
+ }
+ }
+
+ logEAlpha -= lgamma(sumAlpha);
+
+ for(int i=0;i<numSamples;i++){
+ logE += zMatrix[currentPartition][i] * lgamma(sumAlphaX[i]);
+ }
+
+ return logE + weight * logEAlpha + nu * sumAlpha - eta * sumLambda;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "negativeLogEvidenceLambdaPi");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::negativeLogDerivEvidenceLambdaPi(vector<double>& x, vector<double>& df){
+ try{
+// cout << "\tstart negativeLogDerivEvidenceLambdaPi" << endl;
+
+ vector<double> storeVector(numSamples, 0.0000);
+ vector<double> derivative(numOTUs, 0.0000);
+ vector<double> alpha(numOTUs, 0.0000);
+
+ double store = 0.0000;
+ double nu = 0.1000;
+ double eta = 0.1000;
+
+ double weight = 0.0000;
+ for(int i=0;i<numSamples;i++){
+ weight += zMatrix[currentPartition][i];
+ }
+
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return; }
+// cout << "start i loop" << endl;
+//
+// cout << i << '\t' << alpha[i] << '\t' << x[i] << '\t' << exp(x[i]) << '\t' << store << endl;
+
+ alpha[i] = exp(x[i]);
+ store += alpha[i];
+
+// cout << "before derivative" << endl;
+
+ derivative[i] = weight * psi(alpha[i]);
+
+// cout << "after derivative" << endl;
+
+// cout << i << '\t' << alpha[i] << '\t' << psi(alpha[i]) << '\t' << derivative[i] << endl;
+
+ for(int j=0;j<numSamples;j++){
+ double X = countMatrix[j][i];
+ double alphaX = X + alpha[i];
+
+ derivative[i] -= zMatrix[currentPartition][j] * psi(alphaX);
+ storeVector[j] += alphaX;
+ }
+// cout << "end i loop" << endl;
+ }
+
+ double sumStore = 0.0000;
+ for(int i=0;i<numSamples;i++){
+ sumStore += zMatrix[currentPartition][i] * psi(storeVector[i]);
+ }
+
+ store = weight * psi(store);
+
+ df.resize(numOTUs, 0.0000);
+
+ for(int i=0;i<numOTUs;i++){
+ df[i] = alpha[i] * (nu + derivative[i] - store + sumStore) - eta;
+// cout << i << '\t' << df[i] << endl;
+ }
+// cout << df.size() << endl;
+// cout << "\tend negativeLogDerivEvidenceLambdaPi" << endl;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "negativeLogDerivEvidenceLambdaPi");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::getNegativeLogEvidence(vector<double>& lambda, int group){
+ try {
+ double sumAlpha = 0.0000;
+ double sumAlphaX = 0.0000;
+ double sumLnGamAlpha = 0.0000;
+ double logEvidence = 0.0000;
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return 0; }
+ double alpha = exp(lambda[i]);
+ double X = countMatrix[group][i];
+ double alphaX = alpha + X;
+
+ sumLnGamAlpha += lgamma(alpha);
+ sumAlpha += alpha;
+ sumAlphaX += alphaX;
+
+ logEvidence -= lgamma(alphaX);
+ }
+
+ sumLnGamAlpha -= lgamma(sumAlpha);
+ logEvidence += lgamma(sumAlphaX);
+
+ return logEvidence + sumLnGamAlpha;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "getNegativeLogEvidence");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::kMeans(){
+ try {
+
+ vector<vector<double> > relativeAbundance(numSamples);
+ vector<vector<double> > alphaMatrix;
+
+ alphaMatrix.resize(numPartitions);
+ lambdaMatrix.resize(numPartitions);
+ for(int i=0;i<numPartitions;i++){
+ alphaMatrix[i].assign(numOTUs, 0);
+ lambdaMatrix[i].assign(numOTUs, 0);
+ }
+
+ //get relative abundance
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { return; }
+ int groupTotal = 0;
+
+ relativeAbundance[i].assign(numOTUs, 0.0);
+
+ for(int j=0;j<numOTUs;j++){
+ groupTotal += countMatrix[i][j];
+ }
+ for(int j=0;j<numOTUs;j++){
+ relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
+ }
+ }
+
+ //randomly assign samples into partitions
+ zMatrix.resize(numPartitions);
+ for(int i=0;i<numPartitions;i++){
+ zMatrix[i].assign(numSamples, 0);
+ }
+
+ for(int i=0;i<numSamples;i++){
+ zMatrix[rand()%numPartitions][i] = 1;
+ }
+
+ double maxChange = 1;
+ int maxIters = 1000;
+ int iteration = 0;
+
+ weights.assign(numPartitions, 0);
+
+ while(maxChange > 1e-6 && iteration < maxIters){
+
+ if (m->control_pressed) { return; }
+ //calcualte average relative abundance
+ maxChange = 0.0000;
+ for(int i=0;i<numPartitions;i++){
+
+ double normChange = 0.0;
+
+ weights[i] = 0;
+
+ for(int j=0;j<numSamples;j++){
+ weights[i] += (double)zMatrix[i][j];
+ }
+
+ vector<double> averageRelativeAbundance(numOTUs, 0);
+ for(int j=0;j<numOTUs;j++){
+ for(int k=0;k<numSamples;k++){
+ averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
+ }
+ }
+
+ for(int j=0;j<numOTUs;j++){
+ averageRelativeAbundance[j] /= weights[i];
+ double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
+ normChange += difference * difference;
+ alphaMatrix[i][j] = averageRelativeAbundance[j];
+ }
+
+ normChange = sqrt(normChange);
+
+ if(normChange > maxChange){ maxChange = normChange; }
+ }
+
+
+ //calcualte distance between each sample in partition adn the average relative abundance
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { return; }
+
+ double normalizationFactor = 0;
+ vector<double> totalDistToPartition(numPartitions, 0);
+
+ for(int j=0;j<numPartitions;j++){
+ for(int k=0;k<numOTUs;k++){
+ double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
+ totalDistToPartition[j] += difference * difference;
+ }
+ totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
+ normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
+ }
+
+
+ for(int j=0;j<numPartitions;j++){
+ zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
+ }
+
+ }
+
+ iteration++;
+ // cout << "K means: " << iteration << '\t' << maxChange << endl;
+
+ }
+
+ // cout << "Iter:-1";
+ for(int i=0;i<numPartitions;i++){
+ weights[i] = 0.0000;
+
+ for(int j=0;j<numSamples;j++){
+ weights[i] += zMatrix[i][j];
+ }
+ // printf("\tw_%d=%.3f", i, weights[i]);
+ }
+ // cout << endl;
+
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return; }
+ for(int j=0;j<numPartitions;j++){
+ if(alphaMatrix[j][i] > 0){
+ lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
+ }
+ else{
+ lambdaMatrix[j][i] = -10.0;
+ }
+ }
+ }
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "kMeans");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::optimizeLambda(){
+ try {
+ for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
+ if (m->control_pressed) { return; }
+ bfgs2_Solver(lambdaMatrix[currentPartition]);
+ }
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "optimizeLambda");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::calculatePiK(){
+ try {
+ vector<double> store(numPartitions);
+
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { return; }
+ double sum = 0.0000;
+ double minNegLogEvidence =numeric_limits<double>::max();
+
+ for(int j=0;j<numPartitions;j++){
+ double negLogEvidenceJ = getNegativeLogEvidence(lambdaMatrix[j], i);
+
+ if(negLogEvidenceJ < minNegLogEvidence){
+ minNegLogEvidence = negLogEvidenceJ;
+ }
+ store[j] = negLogEvidenceJ;
+ }
+
+ for(int j=0;j<numPartitions;j++){
+ if (m->control_pressed) { return; }
+ zMatrix[j][i] = weights[j] * exp(-(store[j] - minNegLogEvidence));
+ sum += zMatrix[j][i];
+ }
+
+ for(int j=0;j<numPartitions;j++){
+ zMatrix[j][i] /= sum;
+ }
+
+ }
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "calculatePiK");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::getNegativeLogLikelihood(){
+ try {
+ double eta = 0.10000;
+ double nu = 0.10000;
+
+ vector<double> pi(numPartitions, 0.0000);
+ vector<double> logBAlpha(numPartitions, 0.0000);
+
+ double doubleSum = 0.0000;
+
+ for(int i=0;i<numPartitions;i++){
+ if (m->control_pressed) { return 0; }
+ double sumAlphaK = 0.0000;
+
+ pi[i] = weights[i] / (double)numSamples;
+
+ for(int j=0;j<numOTUs;j++){
+ double alpha = exp(lambdaMatrix[i][j]);
+ sumAlphaK += alpha;
+
+ logBAlpha[i] += lgamma(alpha);
+ }
+ logBAlpha[i] -= lgamma(sumAlphaK);
+ }
+
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { return 0; }
+
+ double probability = 0.0000;
+ double factor = 0.0000;
+ double sum = 0.0000;
+ vector<double> logStore(numPartitions, 0.0000);
+ double offset = -numeric_limits<double>::max();
+
+ for(int j=0;j<numOTUs;j++){
+ sum += countMatrix[i][j];
+ factor += lgamma(countMatrix[i][j] + 1.0000);
+ }
+ factor -= lgamma(sum + 1.0);
+
+ for(int k=0;k<numPartitions;k++){
+
+ double sumAlphaKX = 0.0000;
+ double logBAlphaX = 0.0000;
+
+ for(int j=0;j<numOTUs;j++){
+ double alphaX = exp(lambdaMatrix[k][j]) + (double)countMatrix[i][j];
+
+ sumAlphaKX += alphaX;
+ logBAlphaX += lgamma(alphaX);
+ }
+
+ logBAlphaX -= lgamma(sumAlphaKX);
+
+ logStore[k] = logBAlphaX - logBAlpha[k] - factor;
+ if(logStore[k] > offset){
+ offset = logStore[k];
+ }
+
+ }
+
+ for(int k=0;k<numPartitions;k++){
+ probability += pi[k] * exp(-offset + logStore[k]);
+ }
+ doubleSum += log(probability) + offset;
+
+ }
+
+ double L5 = - numOTUs * numPartitions * lgamma(eta);
+ double L6 = eta * numPartitions * numOTUs * log(nu);
+
+ double alphaSum, lambdaSum;
+ alphaSum = lambdaSum = 0.0000;
+
+ for(int i=0;i<numPartitions;i++){
+ for(int j=0;j<numOTUs;j++){
+ if (m->control_pressed) { return 0; }
+ alphaSum += exp(lambdaMatrix[i][j]);
+ lambdaSum += lambdaMatrix[i][j];
+ }
+ }
+ alphaSum *= -nu;
+ lambdaSum *= eta;
+
+ return (-doubleSum - L5 - L6 - alphaSum - lambdaSum);
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "getNegativeLogLikelihood");
+ exit(1);
+ }
+
+
+}
+
+/**************************************************************************************************/
+
+vector<vector<double> > qFinderDMM::getHessian(){
+ try {
+ vector<double> alpha(numOTUs, 0.0000);
+ double alphaSum = 0.0000;
+
+ vector<double> pi = zMatrix[currentPartition];
+ vector<double> psi_ajk(numOTUs, 0.0000);
+ vector<double> psi_cjk(numOTUs, 0.0000);
+ vector<double> psi1_ajk(numOTUs, 0.0000);
+ vector<double> psi1_cjk(numOTUs, 0.0000);
+
+ for(int j=0;j<numOTUs;j++){
+
+ if (m->control_pressed) { break; }
+
+ alpha[j] = exp(lambdaMatrix[currentPartition][j]);
+ alphaSum += alpha[j];
+
+ for(int i=0;i<numSamples;i++){
+ double X = (double) countMatrix[i][j];
+
+ psi_ajk[j] += pi[i] * psi(alpha[j]);
+ psi1_ajk[j] += pi[i] * psi1(alpha[j]);
+
+ psi_cjk[j] += pi[i] * psi(alpha[j] + X);
+ psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
+ }
+ }
+
+
+ double psi_Ck = 0.0000;
+ double psi1_Ck = 0.0000;
+
+ double weight = 0.0000;
+
+ for(int i=0;i<numSamples;i++){
+ if (m->control_pressed) { break; }
+ weight += pi[i];
+ double sum = 0.0000;
+ for(int j=0;j<numOTUs;j++){ sum += alpha[j] + countMatrix[i][j]; }
+
+ psi_Ck += pi[i] * psi(sum);
+ psi1_Ck += pi[i] * psi1(sum);
+ }
+
+ double psi_Ak = weight * psi(alphaSum);
+ double psi1_Ak = weight * psi1(alphaSum);
+
+ vector<vector<double> > hessian(numOTUs);
+ for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
+
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { break; }
+ double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
+ double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
+ double term3 = 0.1 * alpha[i];
+
+ hessian[i][i] = term1 + term2 + term3;
+
+ for(int j=0;j<i;j++){
+ hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
+ hessian[j][i] = hessian[i][j];
+ }
+ }
+
+ return hessian;
+ }
+ catch(exception& e){
+ m->errorOut(e, "qFinderDMM", "getHessian");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
--- /dev/null
+//
+// qFinderDMM.h
+// pds_dmm
+//
+// Created by Patrick Schloss on 11/8/12.
+// Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#ifndef pds_dmm_qFinderDMM_h
+#define pds_dmm_qFinderDMM_h
+
+/**************************************************************************************************/
+
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class qFinderDMM {
+
+public:
+ qFinderDMM(vector<vector<int> >, int);
+ double getNLL() { return currNLL; }
+ double getAIC() { return aic; }
+ double getBIC() { return bic; }
+ double getLogDet() { return logDeterminant; }
+ double getLaplace() { return laplace; }
+ void printZMatrix(string, vector<string>);
+ void printRelAbund(string, vector<string>);
+
+private:
+ MothurOut* m;
+ void kMeans();
+ void optimizeLambda();
+ void calculatePiK();
+
+ double negativeLogEvidenceLambdaPi(vector<double>&);
+ void negativeLogDerivEvidenceLambdaPi(vector<double>&, vector<double>&);
+ double getNegativeLogEvidence(vector<double>&, int);
+ double getNegativeLogLikelihood();
+ vector<vector<double> > getHessian();
+
+ int lineMinimizeFletcher(vector<double>&, vector<double>&, double, double, double, double&, double&, vector<double>&, vector<double>&);
+ int bfgs2_Solver(vector<double>&);//, double, double);
+ double cheb_eval(const double[], int, double);
+ double psi(double);
+ double psi1(double);
+
+ vector<vector<int> > countMatrix;
+ vector<vector<double> > zMatrix;
+ vector<vector<double> > lambdaMatrix;
+ vector<double> weights;
+ vector<vector<double> > error;
+
+ int numPartitions;
+ int numSamples;
+ int numOTUs;
+ int currentPartition;
+
+ double currNLL;
+ double aic;
+ double bic;
+ double logDeterminant;
+ double laplace;
+
+};
+
+/**************************************************************************************************/
+
+#endif
return data[index].abundance;
}
+/***********************************************************************/
+//returns vector of abundances
+vector<int> SharedRAbundVector::getAbundances(){
+ vector<int> abunds;
+ for (int i = 0; i < data.size(); i++) {
+ abunds.push_back(data[i].abundance);
+ }
+
+ return abunds;
+}
+
+
/***********************************************************************/
int SharedRAbundVector::numNZ(){
individual get(int);
vector <individual> getData();
int getAbundance(int);
+ vector<int> getAbundances();
int numNZ();
void sortD(); //Sorts the data in descending order.
void push_front(int, int, string); //abundance, otu, groupname