X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=getmetacommunitycommand.cpp;fp=getmetacommunitycommand.cpp;h=59efe608e624ebc4ab9e71ce77aba23554bd40cb;hb=4a760c2d164aa955dee7d3d38da323822763d906;hp=0000000000000000000000000000000000000000;hpb=feb0fbef36b8a681efc04e9b5e3efb1647b99021;p=mothur.git diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp new file mode 100644 index 0000000..59efe60 --- /dev/null +++ b/getmetacommunitycommand.cpp @@ -0,0 +1,550 @@ +// +// getmetacommunitycommand.cpp +// Mothur +// +// Created by SarahsWork on 4/9/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#include "getmetacommunitycommand.h" +#include "qFinderDMM.h" + +//********************************************************************************************************************** +vector GetMetaCommunityCommand::setParameters(){ + try { + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions); + CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions); + CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "NewCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string GetMetaCommunityCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n"; + helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n"; + helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n"; + helpString += "The minpartitions parameter is used to .... Default=5.\n"; + helpString += "The maxpartitions parameter is used to .... Default=10.\n"; + helpString += "The optimizegap parameter is used to .... Default=3.\n"; + helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string GetMetaCommunityCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fit") { pattern = "[filename],[distance],mix.fit"; } + else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; } + else if (type == "design") { pattern = "[filename],mix.design"; } + else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; } + else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; } + else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +GetMetaCommunityCommand::GetMetaCommunityCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["fit"] = tempOutNames; + outputTypes["relabund"] = tempOutNames; + outputTypes["matrix"] = tempOutNames; + outputTypes["design"] = tempOutNames; + outputTypes["parameters"] = tempOutNames; + outputTypes["summary"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand"); + exit(1); + } +} +//********************************************************************************************************************** +GetMetaCommunityCommand::GetMetaCommunityCommand(string option) { + try { + abort = false; calledHelp = false; + allLines=true; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + //valid paramters for this command + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + vector tempOutNames; + outputTypes["fit"] = tempOutNames; + outputTypes["relabund"] = tempOutNames; + outputTypes["matrix"] = tempOutNames; + outputTypes["design"] = tempOutNames; + outputTypes["parameters"] = tempOutNames; + outputTypes["summary"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + if(it != parameters.end()){ + path = m->hasPath(it->second); + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + //get shared file, it is required + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it + } + + string temp = validParameter.validFile(parameters, "minpartitions", false); if (temp == "not found"){ temp = "5"; } + m->mothurConvert(temp, minpartitions); + + temp = validParameter.validFile(parameters, "maxpartitions", false); if (temp == "not found"){ temp = "10"; } + m->mothurConvert(temp, maxpartitions); + + temp = validParameter.validFile(parameters, "optimizegap", false); if (temp == "not found"){ temp = "3"; } + m->mothurConvert(temp, optimizegap); + + string groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { m->splitAtDash(groups, Groups); } + m->setGroups(Groups); + + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + } + + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int GetMetaCommunityCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { return 0; } + + //get next line to process + lookup = input.getSharedRAbundVectors(); + } + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + //output files created by command + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +int GetMetaCommunityCommand::process(vector& thislookup){ + try { + + double minLaplace = 1e10; + int minPartition = 0; + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = thislookup[0]->getLabel(); + string outputFileName = getOutputFileName("fit", variables); + outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName); + + ofstream fitData; + m->openOutputFile(outputFileName, fitData); + fitData.setf(ios::fixed, ios::floatfield); + fitData.setf(ios::showpoint); + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + + vector< vector > sharedMatrix; + for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); } + + m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); + fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; + + for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){ + + if (m->control_pressed) { break; } + + qFinderDMM findQ(sharedMatrix, numPartitions); + + double laplace = findQ.getLaplace(); + m->mothurOut(toString(numPartitions) + '\t'); + cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t'; + m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t'); + cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace; + m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace)); + + fitData << numPartitions << '\t'; + fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t'; + fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl; + + if(laplace < minLaplace){ + minPartition = numPartitions; + minLaplace = laplace; + m->mothurOut("***"); + } + m->mothurOutEndLine(); + + variables["[tag]"] = toString(numPartitions); + string relabund = getOutputFileName("relabund", variables); + outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); + string matrix = getOutputFileName("matrix", variables); + outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix); + + findQ.printZMatrix(matrix, m->getGroups()); + findQ.printRelAbund(relabund, m->currentBinLabels); + + if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break; } + } + fitData.close(); + + //minPartition = 4; + + if (m->control_pressed) { return 0; } + + generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel()); + + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "process"); + exit(1); + } +} +/**************************************************************************************************/ + +vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){ + try { + vector piValues(numPartitions, 0); + + ifstream postFile; + m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input)); + string outputFileName = getOutputFileName("design", variables); + ofstream designFile; + m->openOutputFile(outputFileName, designFile); + outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName); + + + vector titles(numPartitions); + + for(int i=0;i> titles[i]; } + + double posterior; + string sampleName; + int numSamples = 0; + + while(postFile){ + + if (m->control_pressed) { break; } + + double maxPosterior = 0.0000; + int maxPartition = -1; + + postFile >> sampleName; + + for(int i=0;i> posterior; + if(posterior > maxPosterior){ + maxPosterior = posterior; + maxPartition = i; + } + piValues[i] += posterior; + + } + + designFile << sampleName << '\t' << titles[maxPartition] << endl; + + numSamples++; + m->gobble(postFile); + } + for(int i=0;ierrorOut(e, "GetMetaCommunityCommand", "generateDesignFile"); + exit(1); + } +} + +/**************************************************************************************************/ + +inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; } + +/**************************************************************************************************/ +int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){ + try { + vector summary; + + vector pMean(numPartitions, 0); + vector pLCI(numPartitions, 0); + vector pUCI(numPartitions, 0); + + string name, header; + double mean, lci, uci; + + + vector piValues = generateDesignFile(numPartitions, designInput); + + ifstream referenceFile; + m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str()); + ifstream partitionFile; + m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str()); + + header = m->getline(referenceFile); + header = m->getline(partitionFile); + stringstream head(header); + string dummy, label; + head >> dummy; + vector thetaValues(numPartitions, ""); + for(int i=0;i> label >> dummy >> dummy; + thetaValues[i] = label.substr(label.find_last_of('_')+1); + } + + + vector partitionDiff(numPartitions, 0.0000); + + while(referenceFile){ + + if (m->control_pressed) { break; } + + referenceFile >> name >> mean >> lci >> uci; + + summaryData tempData; + tempData.name = name; + tempData.refMean = mean; + + double difference = 0.0000; + + partitionFile >> name; + for(int j=0;j> pMean[j] >> pLCI[j] >> pUCI[j]; + difference += abs(mean - pMean[j]); + partitionDiff[j] += abs(mean - pMean[j]);; + } + + tempData.partMean = pMean; + tempData.partLCI = pLCI; + tempData.partUCI = pUCI; + tempData.difference = difference; + summary.push_back(tempData); + + m->gobble(referenceFile); + m->gobble(partitionFile); + } + referenceFile.close(); + partitionFile.close(); + + if (m->control_pressed) { return 0; } + + int numOTUs = (int)summary.size(); + + sort(summary.begin(), summary.end(), summaryFunction); + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = label; + string outputFileName = getOutputFileName("parameters", variables); + outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName); + + ofstream parameterFile; + m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str()); + parameterFile.setf(ios::fixed, ios::floatfield); + parameterFile.setf(ios::showpoint); + + double totalDifference = 0.0000; + parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n"; + for(int i=0;icontrol_pressed) { break; } + parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl; + totalDifference += partitionDiff[i]; + } + parameterFile.close(); + + if (m->control_pressed) { return 0; } + + string summaryFileName = getOutputFileName("summary", variables); + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + + ofstream summaryFile; + m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str()); + summaryFile.setf(ios::fixed, ios::floatfield); + summaryFile.setf(ios::showpoint); + + + summaryFile << "OTU\tP0.mean"; + for(int i=0;icontrol_pressed) { break; } + summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean; + for(int j=0;jerrorOut(e, "GetMetaCommunityCommand", "generateSummaryFile"); + exit(1); + } + +} +//********************************************************************************************************************** +