X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=unifracunweightedcommand.cpp;h=2e90626585b2c86e31f34770b7f7868eb2926ba7;hb=df905918ace8537273616b4120d404ebd29dc9cc;hp=68c943cb4e4c57efcc23eafe45f0da47fa6d4495;hpb=257808d42702d889a85d0132abc6b8776fc57418;p=mothur.git diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 68c943c..2e90626 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -9,19 +9,71 @@ #include "unifracunweightedcommand.h" +//********************************************************************************************************************** +vector UnifracUnweightedCommand::getValidParameters(){ + try { + string Array[] = {"groups","iters","distance","random","root", "processors","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "UnifracUnweightedCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +UnifracUnweightedCommand::UnifracUnweightedCommand(){ + try { + globaldata = GlobalData::getInstance(); + abort = true; calledHelp = true; + vector tempOutNames; + outputTypes["unweighted"] = tempOutNames; + outputTypes["uwsummary"] = tempOutNames; + outputTypes["phylip"] = tempOutNames; + outputTypes["column"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector UnifracUnweightedCommand::getRequiredParameters(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "UnifracUnweightedCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector UnifracUnweightedCommand::getRequiredFiles(){ + try { + string Array[] = {"tree","group"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + return myArray; + } + catch(exception& e) { + m->errorOut(e, "UnifracUnweightedCommand", "getRequiredFiles"); + exit(1); + } +} /***********************************************************/ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { try { globaldata = GlobalData::getInstance(); - abort = false; + abort = false; calledHelp = false; Groups.clear(); //allow user to run help - if(option == "help") { help(); abort = true; } + if(option == "help") { help(); abort = true; calledHelp = true; } else { //valid paramters for this command - string Array[] = {"groups","iters","distance","random", "outputdir","inputdir"}; + string Array[] = {"groups","iters","distance","random","root", "processors","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -34,6 +86,13 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["unweighted"] = tempOutNames; + outputTypes["uwsummary"] = tempOutNames; + outputTypes["phylip"] = tempOutNames; + outputTypes["column"] = tempOutNames; + if (globaldata->gTree.size() == 0) {//no trees were read m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.unweighted command."); m->mothurOutEndLine(); abort = true; } @@ -55,12 +114,22 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } convert(itersString, iters); - string temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; } - phylip = m->isTrue(temp); + string temp = validParameter.validFile(parameters, "distance", false); + if (temp == "not found") { phylip = false; outputForm = ""; } + else{ + if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; } + else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; } + } temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; } random = m->isTrue(temp); + temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; } + includeRoot = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + convert(temp, processors); + if (!random) { iters = 0; } //turn off random calcs //if user selects distance = true and no groups it won't calc the pairwise @@ -74,7 +143,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { T = globaldata->gTree; tmap = globaldata->gTreemap; sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".uwsummary"; - outputNames.push_back(sumFile); + outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile); m->openOutputFile(sumFile, outSum); util = new SharedUtil(); @@ -83,7 +152,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } - unweighted = new Unweighted(tmap); + unweighted = new Unweighted(tmap, includeRoot); } @@ -101,11 +170,13 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { void UnifracUnweightedCommand::help(){ try { m->mothurOut("The unifrac.unweighted command can only be executed after a successful read.tree command.\n"); - m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance and random. No parameters are required.\n"); + m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors, root and random. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"); m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"); - m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n"); + m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n"); m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n"); + m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n"); + m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n"); m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n"); m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n"); m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n"); @@ -123,7 +194,7 @@ void UnifracUnweightedCommand::help(){ int UnifracUnweightedCommand::execute() { try { - if (abort == true) { return 0; } + if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); @@ -131,6 +202,8 @@ int UnifracUnweightedCommand::execute() { randomData.resize(numComp,0); //data[0] = unweightedscore //create new tree with same num nodes and leaves as users + if (numComp < processors) { processors = numComp; } + outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t' << "UWSig" << endl; m->mothurOut("Tree#\tGroups\tUWScore\tUWSig"); m->mothurOutEndLine(); @@ -147,6 +220,7 @@ int UnifracUnweightedCommand::execute() { if (random) { output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted", itersString); outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted"); + outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted"); } @@ -156,14 +230,9 @@ int UnifracUnweightedCommand::execute() { utreeScores.resize(numComp); UWScoreSig.resize(numComp); - userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore - - if (m->control_pressed) { - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + userData = unweighted->getValues(T[i], processors, outputDir); //userData[0] = unweightedscore + + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }return 0; } //output scores for each combination for(int k = 0; k < numComp; k++) { @@ -173,18 +242,14 @@ int UnifracUnweightedCommand::execute() { //add users score to validscores validScores[userData[k]] = userData[k]; } - + //get unweighted scores for random trees - if random is false iters = 0 for (int j = 0; j < iters; j++) { + //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison - randomData = unweighted->getValues(T[i], "", ""); + randomData = unweighted->getValues(T[i], "", "", processors, outputDir); - if (m->control_pressed) { - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } for(int k = 0; k < numComp; k++) { //add trees unweighted score to map of scores @@ -198,6 +263,9 @@ int UnifracUnweightedCommand::execute() { //add randoms score to validscores validScores[randomData[k]] = randomData[k]; } + + //report progress +// m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine(); } for(int a = 0; a < numComp; a++) { @@ -218,13 +286,7 @@ int UnifracUnweightedCommand::execute() { } - - if (m->control_pressed) { - if (random) { delete output; } - outSum.close(); - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //print output files printUWSummaryFile(i); @@ -301,16 +363,16 @@ void UnifracUnweightedCommand::printUWSummaryFile(int i) { if (UWScoreSig[a][0] > (1/(float)iters)) { outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; - m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])); m->mothurOutEndLine(); + m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n"); }else { outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; - m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters)))); m->mothurOutEndLine(); + m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n"); } }else{ outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl; - m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t0.00"); m->mothurOutEndLine(); + m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t0.00\n"); } } @@ -323,15 +385,23 @@ void UnifracUnweightedCommand::printUWSummaryFile(int i) { /***********************************************************/ void UnifracUnweightedCommand::createPhylipFile(int i) { try { - string phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted.dist"; - outputNames.push_back(phylipFileName); + string phylipFileName; + if ((outputForm == "lt") || (outputForm == "square")) { + phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted.phylip.dist"; + outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); + }else { //column + phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted.column.dist"; + outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); + } ofstream out; m->openOutputFile(phylipFileName, out); - - //output numSeqs - out << globaldata->Groups.size() << endl; - + + if ((outputForm == "lt") || (outputForm == "square")) { + //output numSeqs + out << globaldata->Groups.size() << endl; + } + //make matrix with scores in it vector< vector > dists; dists.resize(globaldata->Groups.size()); for (int i = 0; i < globaldata->Groups.size(); i++) { @@ -341,7 +411,7 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { //flip it so you can print it int count = 0; for (int r=0; rGroups.size(); r++) { - for (int l = r+1; l < globaldata->Groups.size(); l++) { + for (int l = 0; l < r; l++) { dists[r][l] = utreeScores[count][0]; dists[l][r] = utreeScores[count][0]; count++; @@ -355,11 +425,30 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { if (name.length() < 10) { //pad with spaces to make compatible while (name.length() < 10) { name += " "; } } - out << name << '\t'; - //output distances - for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; } - out << endl; + if (outputForm == "lt") { + out << name << '\t'; + + //output distances + for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; } + out << endl; + }else if (outputForm == "square") { + out << name << '\t'; + + //output distances + for (int l = 0; l < globaldata->Groups.size(); l++) { out << dists[r][l] << '\t'; } + out << endl; + }else{ + //output distances + for (int l = 0; l < r; l++) { + string otherName = globaldata->Groups[l]; + if (otherName.length() < 10) { //pad with spaces to make compatible + while (otherName.length() < 10) { otherName += " "; } + } + + out << name << '\t' << otherName << dists[r][l] << endl; + } + } } out.close(); }