]> git.donarmstrong.com Git - mothur.git/commitdiff
added get.metacommunity command.
authorSarahsWork <sarahswork@imac.westcotts.net>
Fri, 12 Apr 2013 17:52:41 +0000 (13:52 -0400)
committerSarahsWork <sarahswork@imac.westcotts.net>
Fri, 12 Apr 2013 17:52:41 +0000 (13:52 -0400)
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
getmetacommunitycommand.cpp [new file with mode: 0644]
getmetacommunitycommand.h [new file with mode: 0644]
linearalgebra.cpp
linearalgebra.h
qFinderDMM.cpp [new file with mode: 0644]
qFinderDMM.h [new file with mode: 0644]
sharedrabundvector.cpp
sharedrabundvector.h

index ce07a034d64154d2d77fcf7cab64f922bec56231..284bb592f407196ae05100feb4f0c45160e68027 100644 (file)
@@ -44,6 +44,8 @@
                A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
                A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
                A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
+               A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */; };
+               A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */; };
                A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
                A76CDD821510F143004C8458 /* prcseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A76CDD811510F143004C8458 /* prcseqscommand.cpp */; };
                A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; };
                A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = counttable.h; sourceTree = "<group>"; };
                A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = "<group>"; };
                A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = "<group>"; };
+               A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = getmetacommunitycommand.h; sourceTree = "<group>"; };
+               A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getmetacommunitycommand.cpp; sourceTree = "<group>"; };
+               A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qFinderDMM.cpp; sourceTree = "<group>"; };
+               A7548FAF171440ED00B1F05A /* qFinderDMM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qFinderDMM.h; sourceTree = "<group>"; };
                A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
                A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = "<group>"; };
                A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pcrseqscommand.h; sourceTree = "<group>"; };
                                A7E9B77C12D37EC400DA6239 /* overlap.hpp */,
                                A7E9B79B12D37EC400DA6239 /* progress.cpp */,
                                A7E9B79C12D37EC400DA6239 /* progress.hpp */,
+                               A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
+                               A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
                                A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
                                A7386C191619C9FB00651424 /* randomforest */,
                                A7E9B7A612D37EC400DA6239 /* rarecalc.h */,
                                A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */,
                                A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */,
                                A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */,
+                               A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */,
+                               A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */,
                                A70056E8156A93E300924A2D /* getotulabelscommand.h */,
                                A70056E5156A93D000924A2D /* getotulabelscommand.cpp */,
                                A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */,
                                A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */,
                                A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */,
                                A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
+                               A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */,
+                               A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 193c53c4d610111cf369084f1a153400bfbe38e5..6c6bc17bb3dfd47f31b68facd01d6e0724cf3d8d 100644 (file)
 #include "getdistscommand.h"
 #include "removedistscommand.h"
 #include "mergetaxsummarycommand.h"
+#include "getmetacommunitycommand.h"
 
 /*******************************************************/
 
@@ -305,6 +306,7 @@ CommandFactory::CommandFactory(){
     commands["get.dists"]           = "get.dists";
     commands["remove.dists"]        = "remove.dists";
     commands["merge.taxsummary"]    = "merge.taxsummary";
+    commands["get.metacommunity"]   = "get.metacommunity";
     
 
 }
@@ -525,6 +527,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
         else if(commandName == "remove.dists")          {      command = new RemoveDistsCommand(optionString);             }
         else if(commandName == "merge.taxsummary")      {      command = new MergeTaxSummaryCommand(optionString);         }
+        else if(commandName == "get.metacommunity")     {      command = new GetMetaCommunityCommand(optionString);        }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -686,6 +689,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "get.dists")             {      pipecommand = new GetDistsCommand(optionString);                }
         else if(commandName == "remove.dists")          {      pipecommand = new RemoveDistsCommand(optionString);             }
         else if(commandName == "merge.taxsummary")      {      pipecommand = new MergeTaxSummaryCommand(optionString);         }
+        else if(commandName == "get.metacommunity")     {      pipecommand = new GetMetaCommunityCommand(optionString);        }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -833,6 +837,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "get.dists")             {      shellcommand = new GetDistsCommand();               }
         else if(commandName == "remove.dists")          {      shellcommand = new RemoveDistsCommand();            }
         else if(commandName == "merge.taxsummary")      {      shellcommand = new MergeTaxSummaryCommand();        }
+        else if(commandName == "get.metacommunity")     {      shellcommand = new GetMetaCommunityCommand();       }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp
new file mode 100644 (file)
index 0000000..59efe60
--- /dev/null
@@ -0,0 +1,550 @@
+//
+//  getmetacommunitycommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 4/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "getmetacommunitycommand.h"
+#include "qFinderDMM.h"
+
+//**********************************************************************************************************************
+vector<string> GetMetaCommunityCommand::setParameters(){
+       try {
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
+        CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
+        CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NewCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetMetaCommunityCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n";
+        helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
+               helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
+               helpString += "The minpartitions parameter is used to .... Default=5.\n";
+        helpString += "The maxpartitions parameter is used to .... Default=10.\n";
+        helpString += "The optimizegap parameter is used to .... Default=3.\n";
+               helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetMetaCommunityCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "fit")              {  pattern = "[filename],[distance],mix.fit"; }
+        else if (type == "relabund")    {  pattern = "[filename],[distance],[tag],mix.relabund"; }
+        else if (type == "design")      {  pattern = "[filename],mix.design"; }
+        else if (type == "matrix")      {  pattern = "[filename],[distance],[tag],mix.posterior"; }
+        else if (type == "parameters")  {  pattern = "[filename],[distance],mix.parameters"; }
+        else if (type == "summary")  {  pattern = "[filename],[distance],mix.summary"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+GetMetaCommunityCommand::GetMetaCommunityCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["fit"] = tempOutNames;
+        outputTypes["relabund"] = tempOutNames;
+        outputTypes["matrix"] = tempOutNames;
+        outputTypes["design"] = tempOutNames;
+        outputTypes["parameters"] = tempOutNames;
+        outputTypes["summary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetMetaCommunityCommand::GetMetaCommunityCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+        allLines=true;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["fit"] = tempOutNames;
+            outputTypes["relabund"] = tempOutNames;
+            outputTypes["matrix"] = tempOutNames;
+            outputTypes["design"] = tempOutNames;
+            outputTypes["parameters"] = tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                string path;
+                it = parameters.find("shared");
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+                       
+            //get shared file, it is required
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+                       }
+            
+            string temp = validParameter.validFile(parameters, "minpartitions", false);        if (temp == "not found"){       temp = "5";      }
+                       m->mothurConvert(temp, minpartitions);
+            
+            temp = validParameter.validFile(parameters, "maxpartitions", false);        if (temp == "not found"){      temp = "10";     }
+                       m->mothurConvert(temp, maxpartitions);
+            
+            temp = validParameter.validFile(parameters, "optimizegap", false);          if (temp == "not found"){      temp = "3";      }
+                       m->mothurConvert(temp, optimizegap);
+            
+            string groups = validParameter.validFile(parameters, "groups", false);
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups); }
+                       m->setGroups(Groups);
+            
+            string label = validParameter.validFile(parameters, "label", false);
+                       if (label == "not found") { label = ""; }
+                       else {
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int GetMetaCommunityCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        InputData input(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+        string lastLabel = lookup[0]->getLabel();
+        
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;
+        
+        //as long as you are not at the end of the file or done wih the lines you want
+        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            
+            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
+            
+            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+            }
+            
+            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+            }
+            
+            lastLabel = lookup[0]->getLabel();
+            //prevent memory leak
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+            
+            if (m->control_pressed) { return 0; }
+            
+            //get next line to process
+            lookup = input.getSharedRAbundVectors();
+        }
+        
+        if (m->control_pressed) {  return 0; }
+        
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {
+            m->mothurOut("Your file does not include the label " + *it);
+            if (processedLabels.count(lastLabel) != 1) {
+                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                needToRun = true;
+            }else {
+                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+            }
+        }
+        
+        //run last label if you need to
+        if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            
+            process(lookup);
+            
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+        }
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetMetaCommunityCommand::process(vector<SharedRAbundVector*>& thislookup){
+       try {
+        
+        double minLaplace = 1e10;
+        int minPartition = 0;
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = thislookup[0]->getLabel();
+               string outputFileName = getOutputFileName("fit", variables);
+        outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
+        
+               ofstream fitData;
+               m->openOutputFile(outputFileName, fitData);
+        fitData.setf(ios::fixed, ios::floatfield);
+        fitData.setf(ios::showpoint);
+        cout.setf(ios::fixed, ios::floatfield);
+        cout.setf(ios::showpoint);
+
+        vector< vector<int> > sharedMatrix;
+        for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); }
+        
+        m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        
+        for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){
+            
+            if (m->control_pressed) { break; }
+            
+            qFinderDMM findQ(sharedMatrix, numPartitions);
+            
+            double laplace = findQ.getLaplace();
+            m->mothurOut(toString(numPartitions) + '\t');
+            cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
+            m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
+            cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
+            m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
+            
+            fitData << numPartitions << '\t';
+            fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
+            fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
+            
+            if(laplace < minLaplace){
+                minPartition = numPartitions;
+                minLaplace = laplace;
+                m->mothurOut("***");
+            }
+            m->mothurOutEndLine();
+            
+            variables["[tag]"] = toString(numPartitions);
+            string relabund = getOutputFileName("relabund", variables);
+            outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+            string matrix = getOutputFileName("matrix", variables);
+            outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix);
+            
+            findQ.printZMatrix(matrix, m->getGroups());
+            findQ.printRelAbund(relabund, m->currentBinLabels);
+            
+            if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break;  }
+        }
+        fitData.close();
+        
+        //minPartition = 4;
+        
+        if (m->control_pressed) { return 0; }
+        
+        generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel());
+
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "process");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){
+    try {
+        vector<double> piValues(numPartitions, 0);
+        
+        ifstream postFile;
+        m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input));
+               string outputFileName = getOutputFileName("design", variables);
+        ofstream designFile;
+        m->openOutputFile(outputFileName, designFile);
+        outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName);
+        
+        
+        vector<string> titles(numPartitions);
+        
+        for(int i=0;i<numPartitions;i++){   postFile >> titles[i];  }
+        
+        double posterior;
+        string sampleName;
+        int numSamples = 0;
+        
+        while(postFile){
+            
+            if (m->control_pressed) { break; }
+            
+            double maxPosterior = 0.0000;
+            int maxPartition = -1;
+            
+            postFile >> sampleName;
+            
+            for(int i=0;i<numPartitions;i++){
+                
+                postFile >> posterior;
+                if(posterior > maxPosterior){
+                    maxPosterior = posterior;
+                    maxPartition = i;
+                }
+                piValues[i] += posterior;
+                
+            }
+            
+            designFile << sampleName << '\t' << titles[maxPartition] << endl;
+            
+            numSamples++;
+            m->gobble(postFile);
+        }
+        for(int i=0;i<numPartitions;i++){
+            piValues[i] /= (double)numSamples;
+        }
+        
+        
+        postFile.close();
+        designFile.close();
+        
+        return piValues;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "generateDesignFile");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
+
+/**************************************************************************************************/
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){
+    try {
+        vector<summaryData> summary;
+        
+        vector<double> pMean(numPartitions, 0);
+        vector<double> pLCI(numPartitions, 0);
+        vector<double> pUCI(numPartitions, 0);
+        
+        string name, header;
+        double mean, lci, uci;
+        
+        
+        vector<double> piValues = generateDesignFile(numPartitions, designInput);
+        
+        ifstream referenceFile;
+        m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
+        ifstream partitionFile;
+        m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str());
+        
+        header = m->getline(referenceFile);
+        header = m->getline(partitionFile);
+        stringstream head(header);
+        string dummy, label;
+        head >> dummy;
+        vector<string> thetaValues(numPartitions, "");
+        for(int i=0;i<numPartitions;i++){
+            head >> label >> dummy >> dummy;
+            thetaValues[i] = label.substr(label.find_last_of('_')+1);
+        }
+        
+        
+        vector<double> partitionDiff(numPartitions, 0.0000);
+        
+        while(referenceFile){
+            
+            if (m->control_pressed) { break; }
+            
+            referenceFile >> name >> mean >> lci >> uci;
+            
+            summaryData tempData;
+            tempData.name = name;
+            tempData.refMean = mean;
+            
+            double difference = 0.0000;
+            
+            partitionFile >> name;
+            for(int j=0;j<numPartitions;j++){
+                partitionFile >> pMean[j] >> pLCI[j] >> pUCI[j];
+                difference += abs(mean - pMean[j]);
+                partitionDiff[j] += abs(mean - pMean[j]);;
+            }
+            
+            tempData.partMean = pMean;
+            tempData.partLCI = pLCI;
+            tempData.partUCI = pUCI;
+            tempData.difference = difference;
+            summary.push_back(tempData);
+            
+            m->gobble(referenceFile);
+            m->gobble(partitionFile);
+        }
+        referenceFile.close();
+        partitionFile.close();
+        
+        if (m->control_pressed) { return 0; }
+        
+        int numOTUs = (int)summary.size();
+        
+        sort(summary.begin(), summary.end(), summaryFunction);
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = label;
+               string outputFileName = getOutputFileName("parameters", variables);
+        outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName);
+        
+        ofstream parameterFile;
+        m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str());
+        parameterFile.setf(ios::fixed, ios::floatfield);
+        parameterFile.setf(ios::showpoint);
+        
+        double totalDifference =  0.0000;
+        parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n";
+        for(int i=0;i<numPartitions;i++){
+            if (m->control_pressed) { break; }
+            parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl;
+            totalDifference += partitionDiff[i];
+        }
+        parameterFile.close();
+        
+        if (m->control_pressed) { return 0; }
+        
+        string summaryFileName = getOutputFileName("summary", variables);
+        outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+        
+        ofstream summaryFile;
+        m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str());
+        summaryFile.setf(ios::fixed, ios::floatfield);
+        summaryFile.setf(ios::showpoint);
+        
+        
+        summaryFile << "OTU\tP0.mean";
+        for(int i=0;i<numPartitions;i++){
+            summaryFile << "\tP" << i+1 << ".mean\tP" << i+1 << ".lci\tP" << i+1 << ".uci";
+        }
+        summaryFile << "\tDifference\tCumFraction" << endl;
+        
+        double cumDiff = 0.0000;
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { break; }
+            summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean;
+            for(int j=0;j<numPartitions;j++){
+                summaryFile  << '\t' << summary[i].partMean[j] << '\t' << summary[i].partLCI[j] << '\t' << summary[i].partUCI[j];
+            }
+            
+            cumDiff += summary[i].difference/totalDifference;
+            summaryFile << '\t' << summary[i].difference << '\t' << cumDiff << endl;
+        }
+        summaryFile.close();
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "generateSummaryFile");
+               exit(1);
+       }
+    
+}
+//**********************************************************************************************************************
+
diff --git a/getmetacommunitycommand.h b/getmetacommunitycommand.h
new file mode 100644 (file)
index 0000000..62070d6
--- /dev/null
@@ -0,0 +1,64 @@
+//
+//  getmetacommunitycommand.h
+//  Mothur
+//
+//  Created by SarahsWork on 4/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_getmetacommunitycommand_h
+#define Mothur_getmetacommunitycommand_h
+
+#include "command.hpp"
+#include "inputdata.h"
+
+/**************************************************************************************************/
+
+class GetMetaCommunityCommand : public Command {
+public:
+    GetMetaCommunityCommand(string);
+    GetMetaCommunityCommand();
+    ~GetMetaCommunityCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "get.metacommunity";           }
+    string getCommandCategory()                { return "OTU-Based Approaches";         }
+    
+    string getOutputPattern(string);
+    
+       string getHelpString();
+    string getCitation() { return "http://www.mothur.org/wiki/Get.metacommunity"; }
+    string getDescription()            { return "brief description"; }
+    
+    int execute();
+    void help() { m->mothurOut(getHelpString()); }
+    
+private:
+    bool abort, allLines;
+    string outputDir;
+    vector<string> outputNames;
+    string sharedfile;
+    int minpartitions, maxpartitions, optimizegap;
+    vector<string> Groups;
+    set<string> labels;
+    
+    int process(vector<SharedRAbundVector*>&);
+    vector<double> generateDesignFile(int, string);
+    int generateSummaryFile(int, string, string, string, string);
+
+};
+
+/**************************************************************************************************/
+struct summaryData {
+    
+    string name;
+    double refMean, difference;
+    vector<double> partMean, partLCI, partUCI;
+    
+};
+/**************************************************************************************************/
+
+
+
+
+#endif
index effb1ad44e04abd9c1bdd8523ca3b05f4253de94..58f1acccfc7ed4e3aefc902ad199dd7a17a0516d 100644 (file)
@@ -1479,29 +1479,189 @@ double LinearAlgebra::calcPearsonSig(double n, double r){
 /*********************************************************************************************************************************/
 
 vector<vector<double> > LinearAlgebra::getObservedEuclideanDistance(vector<vector<double> >& relAbundData){
+    try {
 
-       int numSamples = relAbundData.size();
-       int numOTUs = relAbundData[0].size();
-       
-       vector<vector<double> > dMatrix(numSamples);
-       for(int i=0;i<numSamples;i++){
-               dMatrix[i].resize(numSamples);
+        int numSamples = relAbundData.size();
+        int numOTUs = relAbundData[0].size();
+        
+        vector<vector<double> > dMatrix(numSamples);
+        for(int i=0;i<numSamples;i++){
+            dMatrix[i].resize(numSamples);
+        }
+        
+        for(int i=0;i<numSamples;i++){
+            for(int j=0;j<numSamples;j++){
+                
+                if (m->control_pressed) { return dMatrix; }
+                
+                double d = 0;
+                for(int k=0;k<numOTUs;k++){
+                    d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
+                }
+                dMatrix[i][j] = pow(d, 0.50000);
+                dMatrix[j][i] = dMatrix[i][j];
+                
+            }
+        }
+        return dMatrix;
        }
-       
-       for(int i=0;i<numSamples;i++){
-               for(int j=0;j<numSamples;j++){
-                       
-                       double d = 0;
-                       for(int k=0;k<numOTUs;k++){
-                               d += pow((relAbundData[i][k] - relAbundData[j][k]), 2.0000);
-                       }
-                       dMatrix[i][j] = pow(d, 0.50000);
-                       dMatrix[j][i] = dMatrix[i][j];
-                       
-               }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "getObservedEuclideanDistance");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+vector<double> LinearAlgebra::solveEquations(vector<vector<double> > A, vector<double> b){
+    try {
+        int length = (int)b.size();
+        vector<double> x(length, 0);
+        vector<int> index(length);
+        for(int i=0;i<length;i++){  index[i] = i;   }
+        double d;
+        
+        ludcmp(A, index, d);  if (m->control_pressed) { return b; }
+        lubksb(A, index, b);
+        
+        return b;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "solveEquations");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<double> >& A, vector<int>& index, double& d){
+    try {
+        double tiny = 1e-20;
+        
+        int n = (int)A.size();
+        vector<double> vv(n, 0.0);
+        double temp;
+        int imax;
+        
+        d = 1.0;
+        
+        for(int i=0;i<n;i++){
+            double big = 0.0;
+            for(int j=0;j<n;j++){   if((temp=fabs(A[i][j])) > big ) big=temp;  }
+            if(big==0.0){   m->mothurOut("Singular matrix in routine ludcmp\n");    }
+            vv[i] = 1.0/big;
+        }
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            for(int i=0;i<j;i++){
+                double sum = A[i][j];
+                for(int k=0;k<i;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+            }
+            
+            double big = 0.0;
+            for(int i=j;i<n;i++){
+                double sum = A[i][j];
+                for(int k=0;k<j;k++){   sum -= A[i][k] * A[k][j];   }
+                A[i][j] = sum;
+                double dum;
+                if((dum = vv[i] * fabs(sum)) >= big){
+                    big = dum;
+                    imax = i;
+                }
+            }
+            if(j != imax){
+                for(int k=0;k<n;k++){
+                    double dum = A[imax][k];
+                    A[imax][k] = A[j][k];
+                    A[j][k] = dum;
+                }
+                d = -d;
+                vv[imax] = vv[j];
+            }
+            index[j] = imax;
+            
+            if(A[j][j] == 0.0){ A[j][j] = tiny; }
+            
+            if(j != n-1){
+                double dum = 1.0/A[j][j];
+                for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+            }
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "ludcmp");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<double> >& A, vector<int>& index, vector<double>& b){
+    try {
+        double total;
+        int n = (int)A.size();
+        int ii = 0;
+        
+        for(int i=0;i<n;i++){
+            if (m->control_pressed) { break; }
+            int ip = index[i];
+            total = b[ip];
+            b[ip] = b[i];
+            
+            if (ii != 0) {
+                for(int j=ii-1;j<i;j++){
+                    total -= A[i][j] * b[j];
+                }
+            }
+            else if(total != 0){  ii = i+1;   }
+            b[i] = total;
+        }
+        for(int i=n-1;i>=0;i--){
+            total = b[i];
+            for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j];  }
+            b[i] = total / A[i][i];
+        }
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "lubksb");
+               exit(1);
+       }
+}
+
+/*********************************************************************************************************************************/
+
+vector<vector<double> > LinearAlgebra::getInverse(vector<vector<double> > matrix){
+    try {
+        int n = (int)matrix.size();
+        
+        vector<vector<double> > inverse(n);
+        for(int i=0;i<n;i++){   inverse[i].assign(n, 0.0000);   }
+        
+        vector<double> column(n, 0.0000);
+        vector<int> index(n, 0);
+        double dummy;
+        
+        ludcmp(matrix, index, dummy);
+        
+        for(int j=0;j<n;j++){
+            if (m->control_pressed) { break; }
+            
+            column.assign(n, 0);
+            
+            column[j] = 1.0000;
+            
+            lubksb(matrix, index, column);
+            
+            for(int i=0;i<n;i++){   inverse[i][j] = column[i];  }
+        }
+        
+        return inverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "getInverse");
+               exit(1);
        }
-       return dMatrix;
-       
 }
 
 /*********************************************************************************************************************************/
index ecb635f70672c8a8928090935fa8af724a5a819b..ef9e03cde900b4e01bdd61a99083801c0d25b913 100644 (file)
@@ -38,6 +38,9 @@ public:
     double calcPearsonSig(double, double); //length, coeff.
     double calcKendallSig(double, double); //length, coeff.
     
+    vector<double> solveEquations(vector<vector<double> >, vector<double>);
+    vector<vector<double> > getInverse(vector<vector<double> >);
+
     
 private:
        MothurOut* m;
@@ -59,6 +62,10 @@ private:
     double ran4(int&); //for testing
     void psdes(unsigned long &, unsigned long &); //for testing
     
+    void ludcmp(vector<vector<double> >&, vector<int>&, double&);
+    void lubksb(vector<vector<double> >&, vector<int>&, vector<double>&);
+
+    
 };
 
 #endif
diff --git a/qFinderDMM.cpp b/qFinderDMM.cpp
new file mode 100644 (file)
index 0000000..c89644b
--- /dev/null
@@ -0,0 +1,1326 @@
+//
+//  qFinderDMM.cpp
+//  pds_dmm
+//
+//  Created by Patrick Schloss on 11/8/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "qFinderDMM.h"
+#include "linearalgebra.h"
+
+#define EPSILON numeric_limits<double>::epsilon()
+
+/**************************************************************************************************/
+
+qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
+    try {
+        m = MothurOut::getInstance();
+        numSamples = (int)countMatrix.size();
+        numOTUs = (int)countMatrix[0].size();
+        
+        
+        kMeans();
+        //    cout << "done kMeans" << endl;
+        
+        optimizeLambda();
+        
+        
+        //    cout << "done optimizeLambda" << endl;
+        
+        double change = 1.0000;
+        currNLL = 0.0000;
+        
+        int iter = 0;
+        
+        while(change > 1.0e-6 && iter < 100){
+            
+            //        cout << "Calc_Z: ";
+            calculatePiK();
+            
+            optimizeLambda();
+            
+            //        printf("Iter:%d\t",iter);
+            
+            for(int i=0;i<numPartitions;i++){
+                weights[i] = 0.0000;
+                for(int j=0;j<numSamples;j++){
+                    weights[i] += zMatrix[i][j];
+                }
+                //            printf("w_%d=%.3f\t",i,weights[i]);
+                
+            }
+            
+            double nLL = getNegativeLogLikelihood();
+            
+            change = abs(nLL - currNLL);
+            
+            currNLL = nLL;
+            
+            //        printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
+            
+            iter++;
+        }
+        
+        error.resize(numPartitions);
+        
+        logDeterminant = 0.0000;
+        
+        LinearAlgebra l;
+        
+        for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
+            
+            error[currentPartition].assign(numOTUs, 0.0000);
+            
+            if(currentPartition > 0){
+                logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
+            }
+            vector<vector<double> > hessian = getHessian();
+            vector<vector<double> > invHessian = l.getInverse(hessian);
+            
+            for(int i=0;i<numOTUs;i++){
+                logDeterminant += log(abs(hessian[i][i]));
+                error[currentPartition][i] = invHessian[i][i];
+            }
+            
+        }
+        
+        int numParameters = numPartitions * numOTUs + numPartitions - 1;
+        laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
+        bic = currNLL + 0.5 * log(numSamples) * numParameters;
+        aic = currNLL + numParameters;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "qFinderDMM", "qFinderDMM");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::printZMatrix(string fileName, vector<string> sampleName){
+    try {
+        ofstream printMatrix;
+        m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
+        printMatrix.setf(ios::fixed, ios::floatfield);
+        printMatrix.setf(ios::showpoint);
+        
+        for(int i=0;i<numPartitions;i++){   printMatrix << "\tPartition_" << i+1;   }   printMatrix << endl;
+        
+        for(int i=0;i<numSamples;i++){
+            printMatrix << sampleName[i];
+            for(int j=0;j<numPartitions;j++){
+                printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
+            }
+            printMatrix << endl;
+        }
+        printMatrix.close();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "qFinderDMM", "printZMatrix");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::printRelAbund(string fileName, vector<string> otuNames){
+    try {
+        ofstream printRA;
+        m->openOutputFile(fileName, printRA); //(fileName.c_str());
+        printRA.setf(ios::fixed, ios::floatfield);
+        printRA.setf(ios::showpoint);
+        
+        vector<double> totals(numPartitions, 0.0000);
+        for(int i=0;i<numPartitions;i++){
+            for(int j=0;j<numOTUs;j++){
+                totals[i] += exp(lambdaMatrix[i][j]);
+            }
+        }
+        
+        printRA << "Taxon";
+        for(int i=0;i<numPartitions;i++){
+            printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
+            printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
+        }
+        printRA << endl;
+        
+        for(int i=0;i<numOTUs;i++){
+            
+            if (m->control_pressed) { break; }
+            
+            printRA << otuNames[i];
+            for(int j=0;j<numPartitions;j++){
+                
+                if(error[j][i] >= 0.0000){
+                    double std = sqrt(error[j][i]);
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
+                }
+                else{
+                    printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
+                    printRA << '\t' << "NA";
+                    printRA << '\t' << "NA";
+                }
+            }
+            printRA << endl;
+        }
+        
+        printRA.close();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "qFinderDMM", "printRelAbund");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+// these functions for bfgs2 solver were lifted from the gnu_gsl source code...
+
+/* Find a minimum in x=[0,1] of the interpolating quadratic through
+ * (0,f0) (1,f1) with derivative fp0 at x=0.  The interpolating
+ * polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2
+ */
+
+static double
+interp_quad (double f0, double fp0, double f1, double zl, double zh)
+{
+    double fl = f0 + zl*(fp0 + zl*(f1 - f0 -fp0));
+    double fh = f0 + zh*(fp0 + zh*(f1 - f0 -fp0));
+    double c = 2 * (f1 - f0 - fp0);       /* curvature */
+    
+    double zmin = zl, fmin = fl;
+    
+    if (fh < fmin) { zmin = zh; fmin = fh; } 
+    
+    if (c > 0)  /* positive curvature required for a minimum */
+    {
+        double z = -fp0 / c;      /* location of minimum */
+        if (z > zl && z < zh) {
+            double f = f0 + z*(fp0 + z*(f1 - f0 -fp0));
+            if (f < fmin) { zmin = z; fmin = f; };
+        }
+    }
+    
+    return zmin;
+}
+
+/**************************************************************************************************/
+
+/* Find a minimum in x=[0,1] of the interpolating cubic through
+ * (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1.
+ *
+ * The interpolating polynomial is:
+ *
+ * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
+ *
+ * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). 
+ */
+
+double cubic (double c0, double c1, double c2, double c3, double z){
+    return c0 + z * (c1 + z * (c2 + z * c3));
+}
+
+/**************************************************************************************************/
+
+void check_extremum (double c0, double c1, double c2, double c3, double z,
+                     double *zmin, double *fmin){
+    /* could make an early return by testing curvature >0 for minimum */
+    
+    double y = cubic (c0, c1, c2, c3, z);
+    
+    if (y < *fmin)  
+    {
+        *zmin = z;  /* accepted new point*/
+        *fmin = y;
+    }
+}
+
+/**************************************************************************************************/
+
+int gsl_poly_solve_quadratic (double a, double b, double c, 
+                              double *x0, double *x1)
+{
+    
+    double disc = b * b - 4 * a * c;
+    
+    if (a == 0) /* Handle linear case */
+    {
+        if (b == 0)
+        {
+            return 0;
+        }
+        else
+        {
+            *x0 = -c / b;
+            return 1;
+        };
+    }
+    
+    if (disc > 0)
+    {
+        if (b == 0)
+        {
+            double r = fabs (0.5 * sqrt (disc) / a);
+            *x0 = -r;
+            *x1 =  r;
+        }
+        else
+        {
+            double sgnb = (b > 0 ? 1 : -1);
+            double temp = -0.5 * (b + sgnb * sqrt (disc));
+            double r1 = temp / a ;
+            double r2 = c / temp ;
+            
+            if (r1 < r2) 
+            {
+                *x0 = r1 ;
+                *x1 = r2 ;
+            } 
+            else 
+            {
+                *x0 = r2 ;
+                *x1 = r1 ;
+            }
+        }
+        return 2;
+    }
+    else if (disc == 0) 
+    {
+        *x0 = -0.5 * b / a ;
+        *x1 = -0.5 * b / a ;
+        return 2 ;
+    }
+    else
+    {
+        return 0;
+    }
+   
+}
+
+/**************************************************************************************************/
+
+double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, double zh){
+    double eta = 3 * (f1 - f0) - 2 * fp0 - fp1;
+    double xi = fp0 + fp1 - 2 * (f1 - f0);
+    double c0 = f0, c1 = fp0, c2 = eta, c3 = xi;
+    double zmin, fmin;
+    double z0, z1;
+    
+    zmin = zl; fmin = cubic(c0, c1, c2, c3, zl);
+    check_extremum (c0, c1, c2, c3, zh, &zmin, &fmin);
+    
+    {
+        int n = gsl_poly_solve_quadratic (3 * c3, 2 * c2, c1, &z0, &z1);
+        
+        if (n == 2)  /* found 2 roots */
+        {
+            if (z0 > zl && z0 < zh) 
+                check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
+            if (z1 > zl && z1 < zh) 
+                check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
+        }
+        else if (n == 1)  /* found 1 root */
+        {
+            if (z0 > zl && z0 < zh) 
+                check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
+        }
+    }
+    
+    return zmin;
+}
+
+/**************************************************************************************************/
+
+double interpolate (double a, double fa, double fpa,
+                    double b, double fb, double fpb, double xmin, double xmax){
+    /* Map [a,b] to [0,1] */
+    double z, alpha, zmin, zmax;
+    
+    zmin = (xmin - a) / (b - a);
+    zmax = (xmax - a) / (b - a);
+    
+    if (zmin > zmax)
+    {
+        double tmp = zmin;
+        zmin = zmax;
+        zmax = tmp;
+    };
+    
+    if(!isnan(fpb) ){
+        z = interp_cubic (fa, fpa * (b - a), fb, fpb * (b - a), zmin, zmax);
+    }
+    else{
+        z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
+    }
+
+    
+    alpha = a + z * (b - a);
+    
+    return alpha;
+}
+
+/**************************************************************************************************/
+
+int qFinderDMM::lineMinimizeFletcher(vector<double>& x, vector<double>& p, double f0, double df0, double alpha1, double& alphaNew, double& fAlpha, vector<double>& xalpha, vector<double>& gradient ){
+    try {
+        
+        double rho = 0.01;
+        double sigma = 0.10;
+        double tau1 = 9.00;
+        double tau2 = 0.05;
+        double tau3 = 0.50;
+        
+        double alpha = alpha1;
+        double alpha_prev = 0.0000;
+        
+        xalpha.resize(numOTUs, 0.0000);
+        
+        double falpha_prev = f0;
+        double dfalpha_prev = df0;
+        
+        double a = 0.0000;          double b = alpha;
+        double fa = f0;             double fb = 0.0000;
+        double dfa = df0;           double dfb = 0.0/0.0;
+        
+        int iter = 0;
+        int maxIters = 100;
+        while(iter++ < maxIters){
+            if (m->control_pressed) { break; }
+            
+            for(int i=0;i<numOTUs;i++){
+                xalpha[i] = x[i] + alpha * p[i];
+            }
+            
+            fAlpha = negativeLogEvidenceLambdaPi(xalpha);
+            
+            if(fAlpha > f0 + alpha * rho * df0 || fAlpha >= falpha_prev){
+                a = alpha_prev;         b = alpha;
+                fa = falpha_prev;       fb = fAlpha;
+                dfa = dfalpha_prev;     dfb = 0.0/0.0;
+                break;
+            }
+            
+            negativeLogDerivEvidenceLambdaPi(xalpha, gradient);
+            double dfalpha = 0.0000;
+            for(int i=0;i<numOTUs;i++){ dfalpha += gradient[i] * p[i]; }
+            
+            if(abs(dfalpha) <= -sigma * df0){
+                alphaNew = alpha;
+                return 1;
+            }
+            
+            if(dfalpha >= 0){
+                a = alpha;                  b = alpha_prev;
+                fa = fAlpha;                fb = falpha_prev;
+                dfa = dfalpha;              dfb = dfalpha_prev;
+                break;
+            }
+            
+            double delta = alpha - alpha_prev;
+            
+            double lower = alpha + delta;
+            double upper = alpha + tau1 * delta;
+            
+            double alphaNext = interpolate(alpha_prev, falpha_prev, dfalpha_prev, alpha, fAlpha, dfalpha, lower, upper);
+            
+            alpha_prev = alpha;
+            falpha_prev = fAlpha;
+            dfalpha_prev = dfalpha;
+            alpha = alphaNext;
+        }
+        
+        iter = 0;
+        while(iter++ < maxIters){
+            if (m->control_pressed) { break; }
+            
+            double delta = b - a;
+            
+            double lower = a + tau2 * delta;
+            double upper = b - tau3 * delta;
+            
+            alpha = interpolate(a, fa, dfa, b, fb, dfb, lower, upper);
+            
+            for(int i=0;i<numOTUs;i++){
+                xalpha[i] = x[i] + alpha * p[i];
+            }
+            
+            fAlpha = negativeLogEvidenceLambdaPi(xalpha);
+            
+            if((a - alpha) * dfa <= EPSILON){
+                return 0;
+            }
+            
+            if(fAlpha > f0 + rho * alpha * df0 || fAlpha >= fa){
+                b = alpha;
+                fb = fAlpha;
+                dfb = 0.0/0.0;
+            }
+            else{
+                double dfalpha = 0.0000;
+                
+                negativeLogDerivEvidenceLambdaPi(xalpha, gradient);
+                dfalpha = 0.0000;
+                for(int i=0;i<numOTUs;i++){ dfalpha += gradient[i] * p[i]; }
+                
+                if(abs(dfalpha) <= -sigma * df0){
+                    alphaNew = alpha;
+                    return 1;
+                }
+                
+                if(((b-a >= 0 && dfalpha >= 0) || ((b-a) <= 0.000 && dfalpha <= 0))){
+                    b = a;      fb = fa;        dfb = dfa;
+                    a = alpha;  fa = fAlpha;    dfa = dfalpha;
+                }
+                else{
+                    a = alpha;
+                    fa = fAlpha;
+                    dfa = dfalpha;
+                }
+            }
+            
+            
+        }
+        
+        return 1;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "qFinderDMM", "lineMinimizeFletcher");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+int qFinderDMM::bfgs2_Solver(vector<double>& x){
+    try{
+//        cout << "bfgs2_Solver" << endl;
+        int bfgsIter = 0;
+        double step = 1.0e-6;
+        double delta_f = 0.0000;//f-f0;
+
+        vector<double> gradient;
+        double f = negativeLogEvidenceLambdaPi(x);
+        
+//        cout << "after negLE" << endl;
+        
+        negativeLogDerivEvidenceLambdaPi(x, gradient);
+
+//        cout << "after negLDE" << endl;
+
+        vector<double> x0 = x;
+        vector<double> g0 = gradient;
+
+        double g0norm = 0;
+        for(int i=0;i<numOTUs;i++){
+            g0norm += g0[i] * g0[i];
+        }
+        g0norm = sqrt(g0norm);
+
+        vector<double> p = gradient;
+        double pNorm = 0;
+        for(int i=0;i<numOTUs;i++){
+            p[i] *= -1 / g0norm;
+            pNorm += p[i] * p[i];
+        }
+        pNorm = sqrt(pNorm);
+        double df0 = -g0norm;
+
+        int maxIter = 5000;
+        
+//        cout << "before while" << endl;
+        
+        while(g0norm > 0.001 && bfgsIter++ < maxIter){
+            if (m->control_pressed) {  return 0; }
+
+            double f0 = f;
+            vector<double> dx(numOTUs, 0.0000);
+            
+            double alphaOld, alphaNew;
+
+            if(pNorm == 0 || g0norm == 0 || df0 == 0){
+                dx.assign(numOTUs, 0.0000);
+                break;
+            }
+            if(delta_f < 0){
+                double delta = max(-delta_f, 10 * EPSILON * abs(f0));
+                alphaOld = min(1.0, 2.0 * delta / (-df0));
+            }
+            else{
+                alphaOld = step;
+            }
+            
+            int success = lineMinimizeFletcher(x0, p, f0, df0, alphaOld, alphaNew, f, x, gradient);
+            
+            if(!success){
+                x = x0;
+                break;   
+            }
+            
+            delta_f = f - f0;
+            
+            vector<double> dx0(numOTUs);
+            vector<double> dg0(numOTUs);
+            
+            for(int i=0;i<numOTUs;i++){
+                dx0[i] = x[i] - x0[i];
+                dg0[i] = gradient[i] - g0[i];
+            }
+            
+            double dxg = 0;
+            double dgg = 0;
+            double dxdg = 0;
+            double dgnorm = 0;
+            
+            for(int i=0;i<numOTUs;i++){
+                dxg += dx0[i] * gradient[i];
+                dgg += dg0[i] * gradient[i];
+                dxdg += dx0[i] * dg0[i];
+                dgnorm += dg0[i] * dg0[i];
+            }
+            dgnorm = sqrt(dgnorm);
+            
+            double A, B;
+            
+            if(dxdg != 0){
+                B = dxg / dxdg;
+                A = -(1.0 + dgnorm*dgnorm /dxdg) * B + dgg / dxdg;            
+            }
+            else{
+                B = 0;
+                A = 0;
+            }
+            
+            for(int i=0;i<numOTUs;i++){     p[i] = gradient[i] - A * dx0[i] - B * dg0[i];   }
+            
+            x0 = x;
+            g0 = gradient;
+            
+
+            double pg = 0;
+            pNorm = 0.0000;
+            g0norm = 0.0000;
+            
+            for(int i=0;i<numOTUs;i++){
+                pg += p[i] * gradient[i];
+                pNorm += p[i] * p[i];
+                g0norm += g0[i] * g0[i];
+            }
+            pNorm = sqrt(pNorm);
+            g0norm = sqrt(g0norm);
+            
+            double dir = (pg >= 0.0) ? -1.0 : +1.0;
+
+            for(int i=0;i<numOTUs;i++){ p[i] *= dir / pNorm;    }
+            
+            pNorm = 0.0000;
+            df0 = 0.0000;
+            for(int i=0;i<numOTUs;i++){
+                pNorm += p[i] * p[i];       
+                df0 += p[i] * g0[i];
+            }
+            
+            pNorm = sqrt(pNorm);
+
+        }
+//        cout << "bfgsIter:\t" << bfgsIter << endl;
+
+        return bfgsIter;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "bfgs2_Solver");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+//can we get these psi/psi1 calculations into their own math class?
+//psi calcualtions swiped from gsl library...
+
+static const double psi_cs[23] = {
+    -.038057080835217922,
+    .491415393029387130, 
+    -.056815747821244730,
+    .008357821225914313,
+    -.001333232857994342,
+    .000220313287069308,
+    -.000037040238178456,
+    .000006283793654854,
+    -.000001071263908506,
+    .000000183128394654,
+    -.000000031353509361,
+    .000000005372808776,
+    -.000000000921168141,
+    .000000000157981265,
+    -.000000000027098646,
+    .000000000004648722,
+    -.000000000000797527,
+    .000000000000136827,
+    -.000000000000023475,
+    .000000000000004027,
+    -.000000000000000691,
+    .000000000000000118,
+    -.000000000000000020
+};
+
+static double apsi_cs[16] = {    
+    -.0204749044678185,
+    -.0101801271534859,
+    .0000559718725387,
+    -.0000012917176570,
+    .0000000572858606,
+    -.0000000038213539,
+    .0000000003397434,
+    -.0000000000374838,
+    .0000000000048990,
+    -.0000000000007344,
+    .0000000000001233,
+    -.0000000000000228,
+    .0000000000000045,
+    -.0000000000000009,
+    .0000000000000002,
+    -.0000000000000000 
+};    
+
+/**************************************************************************************************/
+
+double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){
+    try {
+        double d = 0.0000;
+        double dd = 0.0000;
+        
+        double x2 = xx * 2.0000;
+        
+        for(int j=order;j>=1;j--){
+            if (m->control_pressed) {  return 0; }
+            double temp = d;
+            d = x2 * d - dd + seriesData[j];
+            dd = temp;
+        }
+        
+        d = xx * d - dd + 0.5 * seriesData[0];
+        return d;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "cheb_eval");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::psi(double xx){
+    try {
+        double psiX = 0.0000;
+        
+        if(xx < 1.0000){
+            
+            double t1 = 1.0 / xx;
+            psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
+            psiX = -t1 + psiX;
+            
+        }
+        else if(xx < 2.0000){
+            
+            const double v = xx - 1.0;
+            psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
+            
+        }
+        else{
+            const double t = 8.0/(xx*xx)-1.0;
+            psiX = cheb_eval(apsi_cs, 15, t);
+            psiX += log(xx) - 0.5/xx;
+        }
+        
+        return psiX;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "psi");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+/* coefficients for Maclaurin summation in hzeta()
+ * B_{2j}/(2j)!
+ */
+static double hzeta_c[15] = {
+    1.00000000000000000000000000000,
+    0.083333333333333333333333333333,
+    -0.00138888888888888888888888888889,
+    0.000033068783068783068783068783069,
+    -8.2671957671957671957671957672e-07,
+    2.0876756987868098979210090321e-08,
+    -5.2841901386874931848476822022e-10,
+    1.3382536530684678832826980975e-11,
+    -3.3896802963225828668301953912e-13,
+    8.5860620562778445641359054504e-15,
+    -2.1748686985580618730415164239e-16,
+    5.5090028283602295152026526089e-18,
+    -1.3954464685812523340707686264e-19,
+    3.5347070396294674716932299778e-21,
+    -8.9535174270375468504026113181e-23
+};
+
+/**************************************************************************************************/
+
+double qFinderDMM::psi1(double xx){
+    try {
+        
+        /* Euler-Maclaurin summation formula
+         * [Moshier, p. 400, with several typo corrections]
+         */
+        
+        double s = 2.0000;
+        const int jmax = 12;
+        const int kmax = 10;
+        int j, k;
+        const double pmax  = pow(kmax + xx, -s);
+        double scp = s;
+        double pcp = pmax / (kmax + xx);
+        double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
+        
+        for(k=0; k<kmax; k++) {
+            if (m->control_pressed) {  return 0; }
+            value += pow(k + xx, -s);
+        }
+        
+        for(j=0; j<=jmax; j++) {
+            if (m->control_pressed) {  return 0; }
+            double delta = hzeta_c[j+1] * scp * pcp;
+            value += delta;
+            
+            if(fabs(delta/value) < 0.5*EPSILON) break;
+            
+            scp *= (s+2*j+1)*(s+2*j+2);
+            pcp /= (kmax + xx)*(kmax + xx);
+        }
+        
+        return value;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "psi1");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::negativeLogEvidenceLambdaPi(vector<double>& x){
+    try{
+        vector<double> sumAlphaX(numSamples, 0.0000);
+        
+        double logEAlpha = 0.0000;
+        double sumLambda = 0.0000;
+        double sumAlpha = 0.0000;
+        double logE = 0.0000;
+        double nu = 0.10000;
+        double eta = 0.10000;
+        
+        double weight = 0.00000;
+        for(int i=0;i<numSamples;i++){
+            weight += zMatrix[currentPartition][i];
+        }
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  return 0; }
+            double lambda = x[i];
+            double alpha = exp(x[i]);
+            logEAlpha += lgamma(alpha);
+            sumLambda += lambda;
+            sumAlpha += alpha;
+            
+            for(int j=0;j<numSamples;j++){
+                double X = countMatrix[j][i];
+                double alphaX = alpha + X;
+                sumAlphaX[j] += alphaX;
+                
+                logE -= zMatrix[currentPartition][j] * lgamma(alphaX);
+            }
+        }
+        
+        logEAlpha -= lgamma(sumAlpha);
+
+        for(int i=0;i<numSamples;i++){
+            logE += zMatrix[currentPartition][i] * lgamma(sumAlphaX[i]);
+        }
+
+        return logE + weight * logEAlpha + nu * sumAlpha - eta * sumLambda;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "negativeLogEvidenceLambdaPi");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::negativeLogDerivEvidenceLambdaPi(vector<double>& x, vector<double>& df){
+    try{
+//        cout << "\tstart negativeLogDerivEvidenceLambdaPi" << endl;
+        
+        vector<double> storeVector(numSamples, 0.0000);
+        vector<double> derivative(numOTUs, 0.0000);
+        vector<double> alpha(numOTUs, 0.0000);
+        
+        double store = 0.0000;
+        double nu = 0.1000;
+        double eta = 0.1000;
+        
+        double weight = 0.0000;
+        for(int i=0;i<numSamples;i++){
+            weight += zMatrix[currentPartition][i];
+        }
+
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  return; }
+//            cout << "start i loop" << endl;
+//            
+//            cout << i << '\t' << alpha[i] << '\t' << x[i] << '\t' << exp(x[i]) << '\t' << store << endl;
+            
+            alpha[i] = exp(x[i]);
+            store += alpha[i];
+            
+//            cout << "before derivative" << endl;
+            
+            derivative[i] = weight * psi(alpha[i]);
+
+//            cout << "after derivative" << endl;
+
+//            cout << i << '\t' << alpha[i] << '\t' << psi(alpha[i]) << '\t' << derivative[i] << endl;
+
+            for(int j=0;j<numSamples;j++){
+                double X = countMatrix[j][i];
+                double alphaX = X + alpha[i];
+                
+                derivative[i] -= zMatrix[currentPartition][j] * psi(alphaX);
+                storeVector[j] += alphaX;
+            }
+//            cout << "end i loop" << endl;
+        }
+
+        double sumStore = 0.0000;
+        for(int i=0;i<numSamples;i++){
+            sumStore += zMatrix[currentPartition][i] * psi(storeVector[i]);
+        }
+        
+        store = weight * psi(store);
+        
+        df.resize(numOTUs, 0.0000);
+        
+        for(int i=0;i<numOTUs;i++){
+            df[i] = alpha[i] * (nu + derivative[i] - store + sumStore) - eta;
+//            cout << i << '\t' << df[i] << endl;
+        }
+//        cout << df.size() << endl;
+//        cout << "\tend negativeLogDerivEvidenceLambdaPi" << endl;
+    }
+    catch(exception& e){
+         m->errorOut(e, "qFinderDMM", "negativeLogDerivEvidenceLambdaPi");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::getNegativeLogEvidence(vector<double>& lambda, int group){
+    try {
+        double sumAlpha = 0.0000;
+        double sumAlphaX = 0.0000;
+        double sumLnGamAlpha = 0.0000;
+        double logEvidence = 0.0000;
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  return 0; }
+            double alpha = exp(lambda[i]);
+            double X = countMatrix[group][i];
+            double alphaX = alpha + X;
+            
+            sumLnGamAlpha += lgamma(alpha);
+            sumAlpha += alpha;
+            sumAlphaX += alphaX;
+            
+            logEvidence -= lgamma(alphaX);
+        }
+        
+        sumLnGamAlpha -= lgamma(sumAlpha);
+        logEvidence += lgamma(sumAlphaX);
+        
+        return logEvidence + sumLnGamAlpha;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "getNegativeLogEvidence");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::kMeans(){
+    try {
+        
+        vector<vector<double> > relativeAbundance(numSamples);
+        vector<vector<double> > alphaMatrix;
+        
+        alphaMatrix.resize(numPartitions);
+        lambdaMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            alphaMatrix[i].assign(numOTUs, 0);
+            lambdaMatrix[i].assign(numOTUs, 0);
+        }
+        
+        //get relative abundance
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  return; }
+            int groupTotal = 0;
+            
+            relativeAbundance[i].assign(numOTUs, 0.0);
+            
+            for(int j=0;j<numOTUs;j++){
+                groupTotal += countMatrix[i][j];
+            }
+            for(int j=0;j<numOTUs;j++){
+                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
+            }
+        }
+        
+        //randomly assign samples into partitions
+        zMatrix.resize(numPartitions);
+        for(int i=0;i<numPartitions;i++){
+            zMatrix[i].assign(numSamples, 0);
+        }
+        
+        for(int i=0;i<numSamples;i++){
+            zMatrix[rand()%numPartitions][i] = 1;
+        }
+        
+        double maxChange = 1;
+        int maxIters = 1000;
+        int iteration = 0;
+        
+        weights.assign(numPartitions, 0);
+        
+        while(maxChange > 1e-6 && iteration < maxIters){
+            
+            if (m->control_pressed) {  return; }
+            //calcualte average relative abundance
+            maxChange = 0.0000;
+            for(int i=0;i<numPartitions;i++){
+                
+                double normChange = 0.0;
+                
+                weights[i] = 0;
+                
+                for(int j=0;j<numSamples;j++){
+                    weights[i] += (double)zMatrix[i][j];
+                }
+                
+                vector<double> averageRelativeAbundance(numOTUs, 0);
+                for(int j=0;j<numOTUs;j++){
+                    for(int k=0;k<numSamples;k++){
+                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
+                    }
+                }
+                
+                for(int j=0;j<numOTUs;j++){
+                    averageRelativeAbundance[j] /= weights[i];
+                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
+                    normChange += difference * difference;
+                    alphaMatrix[i][j] = averageRelativeAbundance[j];
+                }
+                
+                normChange = sqrt(normChange);
+                
+                if(normChange > maxChange){ maxChange = normChange; }
+            }
+            
+            
+            //calcualte distance between each sample in partition adn the average relative abundance
+            for(int i=0;i<numSamples;i++){
+                if (m->control_pressed) {  return; }
+                
+                double normalizationFactor = 0;
+                vector<double> totalDistToPartition(numPartitions, 0);
+                
+                for(int j=0;j<numPartitions;j++){
+                    for(int k=0;k<numOTUs;k++){
+                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
+                        totalDistToPartition[j] += difference * difference;
+                    }
+                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
+                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
+                }
+                
+                
+                for(int j=0;j<numPartitions;j++){
+                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
+                }
+                
+            }
+            
+            iteration++;
+            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
+            
+        }
+        
+        //    cout << "Iter:-1";
+        for(int i=0;i<numPartitions;i++){
+            weights[i] = 0.0000;
+            
+            for(int j=0;j<numSamples;j++){
+                weights[i] += zMatrix[i][j];
+            }
+            //        printf("\tw_%d=%.3f", i, weights[i]);
+        }
+        //    cout << endl;
+        
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  return; }
+            for(int j=0;j<numPartitions;j++){
+                if(alphaMatrix[j][i] > 0){
+                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
+                }
+                else{
+                    lambdaMatrix[j][i] = -10.0;
+                }
+            }
+        }
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "kMeans");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::optimizeLambda(){    
+    try {
+        for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
+            if (m->control_pressed) {  return; }
+            bfgs2_Solver(lambdaMatrix[currentPartition]);
+        }
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "optimizeLambda");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
+void qFinderDMM::calculatePiK(){
+    try {
+        vector<double> store(numPartitions);
+        
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  return; }
+            double sum = 0.0000;
+            double minNegLogEvidence =numeric_limits<double>::max();
+            
+            for(int j=0;j<numPartitions;j++){
+                double negLogEvidenceJ = getNegativeLogEvidence(lambdaMatrix[j], i);
+                
+                if(negLogEvidenceJ < minNegLogEvidence){
+                    minNegLogEvidence = negLogEvidenceJ;
+                }
+                store[j] = negLogEvidenceJ;
+            }
+            
+            for(int j=0;j<numPartitions;j++){
+                if (m->control_pressed) {  return; }
+                zMatrix[j][i] = weights[j] * exp(-(store[j] - minNegLogEvidence));
+                sum += zMatrix[j][i];
+            }
+            
+            for(int j=0;j<numPartitions;j++){
+                zMatrix[j][i] /= sum;
+            }
+            
+        }
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "calculatePiK");
+        exit(1);
+    }
+    
+}
+
+/**************************************************************************************************/
+
+double qFinderDMM::getNegativeLogLikelihood(){
+    try {
+        double eta = 0.10000;
+        double nu = 0.10000;
+        
+        vector<double> pi(numPartitions, 0.0000);
+        vector<double> logBAlpha(numPartitions, 0.0000);
+        
+        double doubleSum = 0.0000;
+        
+        for(int i=0;i<numPartitions;i++){
+            if (m->control_pressed) {  return 0; }
+            double sumAlphaK = 0.0000;
+            
+            pi[i] = weights[i] / (double)numSamples;
+            
+            for(int j=0;j<numOTUs;j++){
+                double alpha = exp(lambdaMatrix[i][j]);
+                sumAlphaK += alpha;
+                
+                logBAlpha[i] += lgamma(alpha);
+            }
+            logBAlpha[i] -= lgamma(sumAlphaK);
+        }
+        
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  return 0; }
+            
+            double probability = 0.0000;
+            double factor = 0.0000;
+            double sum = 0.0000;
+            vector<double> logStore(numPartitions, 0.0000);
+            double offset = -numeric_limits<double>::max();
+            
+            for(int j=0;j<numOTUs;j++){
+                sum += countMatrix[i][j];
+                factor += lgamma(countMatrix[i][j] + 1.0000);
+            }
+            factor -= lgamma(sum + 1.0);
+            
+            for(int k=0;k<numPartitions;k++){
+                
+                double sumAlphaKX = 0.0000;
+                double logBAlphaX = 0.0000;
+                
+                for(int j=0;j<numOTUs;j++){
+                    double alphaX = exp(lambdaMatrix[k][j]) + (double)countMatrix[i][j];
+                    
+                    sumAlphaKX += alphaX;
+                    logBAlphaX += lgamma(alphaX);
+                }
+                
+                logBAlphaX -= lgamma(sumAlphaKX);
+                
+                logStore[k] = logBAlphaX - logBAlpha[k] - factor;
+                if(logStore[k] > offset){
+                    offset = logStore[k];
+                }
+                
+            }
+            
+            for(int k=0;k<numPartitions;k++){
+                probability += pi[k] * exp(-offset + logStore[k]);
+            }
+            doubleSum += log(probability) + offset;
+            
+        }
+        
+        double L5 = - numOTUs * numPartitions * lgamma(eta);
+        double L6 = eta * numPartitions * numOTUs * log(nu);
+        
+        double alphaSum, lambdaSum;
+        alphaSum = lambdaSum = 0.0000;
+        
+        for(int i=0;i<numPartitions;i++){
+            for(int j=0;j<numOTUs;j++){
+                if (m->control_pressed) {  return 0; }
+                alphaSum += exp(lambdaMatrix[i][j]);
+                lambdaSum += lambdaMatrix[i][j];
+            }
+        }
+        alphaSum *= -nu;
+        lambdaSum *= eta;
+        
+        return (-doubleSum - L5 - L6 - alphaSum - lambdaSum);
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "getNegativeLogLikelihood");
+        exit(1);
+    }
+
+
+}
+
+/**************************************************************************************************/
+
+vector<vector<double> > qFinderDMM::getHessian(){
+    try {
+        vector<double> alpha(numOTUs, 0.0000);
+        double alphaSum = 0.0000;
+        
+        vector<double> pi = zMatrix[currentPartition];
+        vector<double> psi_ajk(numOTUs, 0.0000);
+        vector<double> psi_cjk(numOTUs, 0.0000);
+        vector<double> psi1_ajk(numOTUs, 0.0000);
+        vector<double> psi1_cjk(numOTUs, 0.0000);
+        
+        for(int j=0;j<numOTUs;j++){
+            
+            if (m->control_pressed) {  break; }
+            
+            alpha[j] = exp(lambdaMatrix[currentPartition][j]);
+            alphaSum += alpha[j];
+            
+            for(int i=0;i<numSamples;i++){
+                double X = (double) countMatrix[i][j];
+                
+                psi_ajk[j] += pi[i] * psi(alpha[j]);
+                psi1_ajk[j] += pi[i] * psi1(alpha[j]);
+                
+                psi_cjk[j] += pi[i] * psi(alpha[j] + X);
+                psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
+            }
+        }
+        
+        
+        double psi_Ck = 0.0000;
+        double psi1_Ck = 0.0000;
+        
+        double weight = 0.0000;
+        
+        for(int i=0;i<numSamples;i++){
+            if (m->control_pressed) {  break; }
+            weight += pi[i];
+            double sum = 0.0000;
+            for(int j=0;j<numOTUs;j++){     sum += alpha[j] + countMatrix[i][j];    }
+            
+            psi_Ck += pi[i] * psi(sum);
+            psi1_Ck += pi[i] * psi1(sum);
+        }
+        
+        double psi_Ak = weight * psi(alphaSum);
+        double psi1_Ak = weight * psi1(alphaSum);
+        
+        vector<vector<double> > hessian(numOTUs);
+        for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) {  break; }
+            double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
+            double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
+            double term3 = 0.1 * alpha[i];
+            
+            hessian[i][i] = term1 + term2 + term3;
+            
+            for(int j=0;j<i;j++){   
+                hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
+                hessian[j][i] = hessian[i][j];
+            }
+        }
+        
+        return hessian;
+    }
+    catch(exception& e){
+        m->errorOut(e, "qFinderDMM", "getHessian");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
diff --git a/qFinderDMM.h b/qFinderDMM.h
new file mode 100644 (file)
index 0000000..d94e6cc
--- /dev/null
@@ -0,0 +1,69 @@
+//
+//  qFinderDMM.h
+//  pds_dmm
+//
+//  Created by Patrick Schloss on 11/8/12.
+//  Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#ifndef pds_dmm_qFinderDMM_h
+#define pds_dmm_qFinderDMM_h
+
+/**************************************************************************************************/
+
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class qFinderDMM {
+  
+public:
+    qFinderDMM(vector<vector<int> >, int);
+    double getNLL()     {    return currNLL;        }  
+    double getAIC()     {    return aic;            }
+    double getBIC()     {    return bic;            }
+    double getLogDet()  {    return logDeterminant; }
+    double getLaplace() {    return laplace;        }
+    void printZMatrix(string, vector<string>);
+    void printRelAbund(string, vector<string>);
+
+private:
+    MothurOut* m;
+    void kMeans();
+    void optimizeLambda();
+    void calculatePiK();
+
+    double negativeLogEvidenceLambdaPi(vector<double>&);
+    void negativeLogDerivEvidenceLambdaPi(vector<double>&, vector<double>&);
+    double getNegativeLogEvidence(vector<double>&, int);
+    double getNegativeLogLikelihood();
+    vector<vector<double> > getHessian();
+    
+    int lineMinimizeFletcher(vector<double>&, vector<double>&, double, double, double, double&, double&, vector<double>&, vector<double>&);
+    int bfgs2_Solver(vector<double>&);//, double, double);
+    double cheb_eval(const double[], int, double);
+    double psi(double);
+    double psi1(double);
+
+    vector<vector<int> > countMatrix;
+    vector<vector<double> > zMatrix;
+    vector<vector<double> > lambdaMatrix;
+    vector<double> weights;
+    vector<vector<double> > error;
+    
+    int numPartitions;
+    int numSamples;
+    int numOTUs;
+    int currentPartition;
+    
+    double currNLL;
+    double aic;
+    double bic;
+    double logDeterminant;
+    double laplace;
+    
+};
+
+/**************************************************************************************************/
+
+#endif
index 3901650cef965dc7a3c0126ecb54a8fb425e434d..7bc333bc02e737f7caab1f4b4d34ec0c131dc2dd 100644 (file)
@@ -211,6 +211,18 @@ int SharedRAbundVector::getAbundance(int index){
        return data[index].abundance;
        
 }
+/***********************************************************************/
+//returns vector of abundances 
+vector<int> SharedRAbundVector::getAbundances(){
+    vector<int> abunds;
+    for (int i = 0; i < data.size(); i++) {
+        abunds.push_back(data[i].abundance);
+    }
+    
+       return abunds;
+}
+
+
 /***********************************************************************/
 
 int SharedRAbundVector::numNZ(){
index 419d15a15af7b73872c2678675aff1a4f564c759..664718518e276c7c84984f5c1fe76c9b3bf46523 100644 (file)
@@ -50,6 +50,7 @@ public:
        individual get(int);
        vector <individual> getData();
        int getAbundance(int);
+    vector<int> getAbundances();
        int numNZ();
        void sortD();  //Sorts the data in descending order.
        void push_front(int, int, string); //abundance, otu, groupname