X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=unifracunweightedcommand.cpp;h=0749cb79ff41e03c55ca2a9defeb1b70e44645d8;hb=44f3a3c81a34fdee62550d98838a4b421e8df08e;hp=dbdee2ad942db51860cb0a7f403662a895def1bf;hpb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;p=mothur.git diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index dbdee2a..0749cb7 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -22,7 +22,7 @@ vector UnifracUnweightedCommand::setParameters(){ CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom); - CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance); + CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance); CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample); CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus); CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot); @@ -63,6 +63,31 @@ string UnifracUnweightedCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "unweighted") { outputFileName = "unweighted"; } + else if (type == "uwsummary") { outputFileName = "uwsummary"; } + else if (type == "phylip") { outputFileName = "dist"; } + else if (type == "column") { outputFileName = "dist"; } + else if (type == "tree") { outputFileName = "tre"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag"); + exit(1); + } +} + //********************************************************************************************************************** UnifracUnweightedCommand::UnifracUnweightedCommand(){ try { @@ -179,6 +204,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { string temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { phylip = false; outputForm = ""; } else{ + if (temp=="phylip") { temp = "lt"; } if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; } else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; } } @@ -245,9 +271,10 @@ int UnifracUnweightedCommand::execute() { T = reader->getTrees(); tmap = T[0]->getTreeMap(); map nameMap = reader->getNames(); + map unique2Dup = reader->getNameMap(); delete reader; - sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary"; + sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary"); outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile); m->openOutputFile(sumFile, outSum); @@ -281,7 +308,7 @@ int UnifracUnweightedCommand::execute() { int thisSize = thisGroupsSeqs.size(); if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); } - else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); } + else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); } } m->setGroups(Groups); } @@ -305,12 +332,12 @@ int UnifracUnweightedCommand::execute() { for (int i = 0; i < T.size(); i++) { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } - counter = 0; + counter = 0; if (random) { - output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted", itersString); - outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted"); - outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted"); + output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString); + outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted")); + outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted")); } @@ -341,18 +368,23 @@ int UnifracUnweightedCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + int startSubsample = time(NULL); + //subsample loop vector< vector > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f. - if (m->control_pressed) { break; } //copy to preserve old one - would do this in subsample but memory cleanup becomes messy. TreeMap* newTmap = new TreeMap(); - newTmap->getCopy(*tmap); + //newTmap->getCopy(*tmap); + //SubSample sample; + //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize); + + //uses method of setting groups to doNotIncludeMe SubSample sample; - Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize); + Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup); //call new weighted function vector iterData; iterData.resize(numComp,0); @@ -367,6 +399,7 @@ int UnifracUnweightedCommand::execute() { if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); } } + m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -473,15 +506,15 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector >& di } } - string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave.dist"; - outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); - + string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip"); + if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); } + else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); } ofstream out; m->openOutputFile(aveFileName, out); - string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std.dist"; - outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); - + string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip"); + if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); } + else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); } ofstream outStd; m->openOutputFile(stdFileName, outStd); @@ -562,7 +595,7 @@ int UnifracUnweightedCommand::getConsensusTrees(vector< vector >& dists, Tree* conTree = con.getTree(newTrees); //create a new filename - string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre"; + string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree"); outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; m->openOutputFile(conFile, outTree); @@ -586,7 +619,7 @@ vector UnifracUnweightedCommand::buildTrees(vector< vector >& dis vector trees; //create a new filename - string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree"); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; @@ -754,10 +787,10 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { try { string phylipFileName; if ((outputForm == "lt") || (outputForm == "square")) { - phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip.dist"; + phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip"); outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); }else { //column - phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column.dist"; + phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column"); outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); }