]> git.donarmstrong.com Git - mothur.git/blobdiff - getmetacommunitycommand.cpp
added get.metacommunity command.
[mothur.git] / getmetacommunitycommand.cpp
diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp
new file mode 100644 (file)
index 0000000..59efe60
--- /dev/null
@@ -0,0 +1,550 @@
+//
+//  getmetacommunitycommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 4/9/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "getmetacommunitycommand.h"
+#include "qFinderDMM.h"
+
+//**********************************************************************************************************************
+vector<string> GetMetaCommunityCommand::setParameters(){
+       try {
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
+        CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
+        CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NewCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetMetaCommunityCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n";
+        helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
+               helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
+               helpString += "The minpartitions parameter is used to .... Default=5.\n";
+        helpString += "The maxpartitions parameter is used to .... Default=10.\n";
+        helpString += "The optimizegap parameter is used to .... Default=3.\n";
+               helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string GetMetaCommunityCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "fit")              {  pattern = "[filename],[distance],mix.fit"; }
+        else if (type == "relabund")    {  pattern = "[filename],[distance],[tag],mix.relabund"; }
+        else if (type == "design")      {  pattern = "[filename],mix.design"; }
+        else if (type == "matrix")      {  pattern = "[filename],[distance],[tag],mix.posterior"; }
+        else if (type == "parameters")  {  pattern = "[filename],[distance],mix.parameters"; }
+        else if (type == "summary")  {  pattern = "[filename],[distance],mix.summary"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+GetMetaCommunityCommand::GetMetaCommunityCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["fit"] = tempOutNames;
+        outputTypes["relabund"] = tempOutNames;
+        outputTypes["matrix"] = tempOutNames;
+        outputTypes["design"] = tempOutNames;
+        outputTypes["parameters"] = tempOutNames;
+        outputTypes["summary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetMetaCommunityCommand::GetMetaCommunityCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+        allLines=true;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["fit"] = tempOutNames;
+            outputTypes["relabund"] = tempOutNames;
+            outputTypes["matrix"] = tempOutNames;
+            outputTypes["design"] = tempOutNames;
+            outputTypes["parameters"] = tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                string path;
+                it = parameters.find("shared");
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+                       
+            //get shared file, it is required
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+                       }
+            
+            string temp = validParameter.validFile(parameters, "minpartitions", false);        if (temp == "not found"){       temp = "5";      }
+                       m->mothurConvert(temp, minpartitions);
+            
+            temp = validParameter.validFile(parameters, "maxpartitions", false);        if (temp == "not found"){      temp = "10";     }
+                       m->mothurConvert(temp, maxpartitions);
+            
+            temp = validParameter.validFile(parameters, "optimizegap", false);          if (temp == "not found"){      temp = "3";      }
+                       m->mothurConvert(temp, optimizegap);
+            
+            string groups = validParameter.validFile(parameters, "groups", false);
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups); }
+                       m->setGroups(Groups);
+            
+            string label = validParameter.validFile(parameters, "label", false);
+                       if (label == "not found") { label = ""; }
+                       else {
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int GetMetaCommunityCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        InputData input(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+        string lastLabel = lookup[0]->getLabel();
+        
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;
+        
+        //as long as you are not at the end of the file or done wih the lines you want
+        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            
+            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
+            
+            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+            }
+            
+            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+            }
+            
+            lastLabel = lookup[0]->getLabel();
+            //prevent memory leak
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+            
+            if (m->control_pressed) { return 0; }
+            
+            //get next line to process
+            lookup = input.getSharedRAbundVectors();
+        }
+        
+        if (m->control_pressed) {  return 0; }
+        
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {
+            m->mothurOut("Your file does not include the label " + *it);
+            if (processedLabels.count(lastLabel) != 1) {
+                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                needToRun = true;
+            }else {
+                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+            }
+        }
+        
+        //run last label if you need to
+        if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            
+            process(lookup);
+            
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+        }
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetMetaCommunityCommand::process(vector<SharedRAbundVector*>& thislookup){
+       try {
+        
+        double minLaplace = 1e10;
+        int minPartition = 0;
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = thislookup[0]->getLabel();
+               string outputFileName = getOutputFileName("fit", variables);
+        outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
+        
+               ofstream fitData;
+               m->openOutputFile(outputFileName, fitData);
+        fitData.setf(ios::fixed, ios::floatfield);
+        fitData.setf(ios::showpoint);
+        cout.setf(ios::fixed, ios::floatfield);
+        cout.setf(ios::showpoint);
+
+        vector< vector<int> > sharedMatrix;
+        for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); }
+        
+        m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        
+        for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){
+            
+            if (m->control_pressed) { break; }
+            
+            qFinderDMM findQ(sharedMatrix, numPartitions);
+            
+            double laplace = findQ.getLaplace();
+            m->mothurOut(toString(numPartitions) + '\t');
+            cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
+            m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
+            cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
+            m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
+            
+            fitData << numPartitions << '\t';
+            fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
+            fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
+            
+            if(laplace < minLaplace){
+                minPartition = numPartitions;
+                minLaplace = laplace;
+                m->mothurOut("***");
+            }
+            m->mothurOutEndLine();
+            
+            variables["[tag]"] = toString(numPartitions);
+            string relabund = getOutputFileName("relabund", variables);
+            outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+            string matrix = getOutputFileName("matrix", variables);
+            outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix);
+            
+            findQ.printZMatrix(matrix, m->getGroups());
+            findQ.printRelAbund(relabund, m->currentBinLabels);
+            
+            if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break;  }
+        }
+        fitData.close();
+        
+        //minPartition = 4;
+        
+        if (m->control_pressed) { return 0; }
+        
+        generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel());
+
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "process");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){
+    try {
+        vector<double> piValues(numPartitions, 0);
+        
+        ifstream postFile;
+        m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input));
+               string outputFileName = getOutputFileName("design", variables);
+        ofstream designFile;
+        m->openOutputFile(outputFileName, designFile);
+        outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName);
+        
+        
+        vector<string> titles(numPartitions);
+        
+        for(int i=0;i<numPartitions;i++){   postFile >> titles[i];  }
+        
+        double posterior;
+        string sampleName;
+        int numSamples = 0;
+        
+        while(postFile){
+            
+            if (m->control_pressed) { break; }
+            
+            double maxPosterior = 0.0000;
+            int maxPartition = -1;
+            
+            postFile >> sampleName;
+            
+            for(int i=0;i<numPartitions;i++){
+                
+                postFile >> posterior;
+                if(posterior > maxPosterior){
+                    maxPosterior = posterior;
+                    maxPartition = i;
+                }
+                piValues[i] += posterior;
+                
+            }
+            
+            designFile << sampleName << '\t' << titles[maxPartition] << endl;
+            
+            numSamples++;
+            m->gobble(postFile);
+        }
+        for(int i=0;i<numPartitions;i++){
+            piValues[i] /= (double)numSamples;
+        }
+        
+        
+        postFile.close();
+        designFile.close();
+        
+        return piValues;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "generateDesignFile");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
+
+/**************************************************************************************************/
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){
+    try {
+        vector<summaryData> summary;
+        
+        vector<double> pMean(numPartitions, 0);
+        vector<double> pLCI(numPartitions, 0);
+        vector<double> pUCI(numPartitions, 0);
+        
+        string name, header;
+        double mean, lci, uci;
+        
+        
+        vector<double> piValues = generateDesignFile(numPartitions, designInput);
+        
+        ifstream referenceFile;
+        m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
+        ifstream partitionFile;
+        m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str());
+        
+        header = m->getline(referenceFile);
+        header = m->getline(partitionFile);
+        stringstream head(header);
+        string dummy, label;
+        head >> dummy;
+        vector<string> thetaValues(numPartitions, "");
+        for(int i=0;i<numPartitions;i++){
+            head >> label >> dummy >> dummy;
+            thetaValues[i] = label.substr(label.find_last_of('_')+1);
+        }
+        
+        
+        vector<double> partitionDiff(numPartitions, 0.0000);
+        
+        while(referenceFile){
+            
+            if (m->control_pressed) { break; }
+            
+            referenceFile >> name >> mean >> lci >> uci;
+            
+            summaryData tempData;
+            tempData.name = name;
+            tempData.refMean = mean;
+            
+            double difference = 0.0000;
+            
+            partitionFile >> name;
+            for(int j=0;j<numPartitions;j++){
+                partitionFile >> pMean[j] >> pLCI[j] >> pUCI[j];
+                difference += abs(mean - pMean[j]);
+                partitionDiff[j] += abs(mean - pMean[j]);;
+            }
+            
+            tempData.partMean = pMean;
+            tempData.partLCI = pLCI;
+            tempData.partUCI = pUCI;
+            tempData.difference = difference;
+            summary.push_back(tempData);
+            
+            m->gobble(referenceFile);
+            m->gobble(partitionFile);
+        }
+        referenceFile.close();
+        partitionFile.close();
+        
+        if (m->control_pressed) { return 0; }
+        
+        int numOTUs = (int)summary.size();
+        
+        sort(summary.begin(), summary.end(), summaryFunction);
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = label;
+               string outputFileName = getOutputFileName("parameters", variables);
+        outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName);
+        
+        ofstream parameterFile;
+        m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str());
+        parameterFile.setf(ios::fixed, ios::floatfield);
+        parameterFile.setf(ios::showpoint);
+        
+        double totalDifference =  0.0000;
+        parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n";
+        for(int i=0;i<numPartitions;i++){
+            if (m->control_pressed) { break; }
+            parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl;
+            totalDifference += partitionDiff[i];
+        }
+        parameterFile.close();
+        
+        if (m->control_pressed) { return 0; }
+        
+        string summaryFileName = getOutputFileName("summary", variables);
+        outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
+        
+        ofstream summaryFile;
+        m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str());
+        summaryFile.setf(ios::fixed, ios::floatfield);
+        summaryFile.setf(ios::showpoint);
+        
+        
+        summaryFile << "OTU\tP0.mean";
+        for(int i=0;i<numPartitions;i++){
+            summaryFile << "\tP" << i+1 << ".mean\tP" << i+1 << ".lci\tP" << i+1 << ".uci";
+        }
+        summaryFile << "\tDifference\tCumFraction" << endl;
+        
+        double cumDiff = 0.0000;
+        
+        for(int i=0;i<numOTUs;i++){
+            if (m->control_pressed) { break; }
+            summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean;
+            for(int j=0;j<numPartitions;j++){
+                summaryFile  << '\t' << summary[i].partMean[j] << '\t' << summary[i].partLCI[j] << '\t' << summary[i].partUCI[j];
+            }
+            
+            cumDiff += summary[i].difference/totalDifference;
+            summaryFile << '\t' << summary[i].difference << '\t' << cumDiff << endl;
+        }
+        summaryFile.close();
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMetaCommunityCommand", "generateSummaryFile");
+               exit(1);
+       }
+    
+}
+//**********************************************************************************************************************
+