From 0cc0c01eb5127ef2b09b894e1f224ccc1d70bef0 Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 27 May 2011 17:59:12 +0000 Subject: [PATCH] added deunique.tree command --- Mothur.xcodeproj/project.pbxproj | 6 + commandfactory.cpp | 5 + deuniquetreecommand.cpp | 278 +++++++++++++++++++++++++++++++ deuniquetreecommand.h | 47 ++++++ matrixoutputcommand.cpp | 2 +- summarysharedcommand.cpp | 2 +- tree.cpp | 52 ++++++ treegroupscommand.cpp | 4 +- 8 files changed, 392 insertions(+), 4 deletions(-) create mode 100644 deuniquetreecommand.cpp create mode 100644 deuniquetreecommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index e80c733..546a2f6 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -40,6 +40,7 @@ A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; }; + A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; }; A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; }; A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; @@ -402,6 +403,8 @@ A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = ""; }; A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = ""; }; A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = ""; }; + A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = ""; }; + A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = ""; }; A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = ""; }; A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = ""; }; A7A61F1A130035C800E05B6B /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; @@ -1224,6 +1227,8 @@ A7E9B6C512D37EC400DA6239 /* degapseqscommand.cpp */, A7E9B6C812D37EC400DA6239 /* deuniqueseqscommand.h */, A7E9B6C712D37EC400DA6239 /* deuniqueseqscommand.cpp */, + A77A221D139001B600B0BE70 /* deuniquetreecommand.h */, + A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */, A7E9B6CC12D37EC400DA6239 /* distancecommand.h */, A7E9B6CB12D37EC400DA6239 /* distancecommand.cpp */, A7E9B6E412D37EC400DA6239 /* filterseqscommand.h */, @@ -2095,6 +2100,7 @@ A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */, A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */, A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */, + A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index 681e3e5..71558a9 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -118,6 +118,7 @@ #include "setcurrentcommand.h" #include "sharedcommand.h" #include "getcommandinfocommand.h" +#include "deuniquetreecommand.h" /*******************************************************/ @@ -239,6 +240,7 @@ CommandFactory::CommandFactory(){ commands["get.current"] = "get.current"; commands["set.current"] = "set.current"; commands["get.commandinfo"] = "get.commandinfo"; + commands["deunique.tree"] = "deunique.tree"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -409,6 +411,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "anosim") { command = new AnosimCommand(optionString); } else if(commandName == "make.shared") { command = new SharedCommand(optionString); } else if(commandName == "get.commandinfo") { command = new GetCommandInfoCommand(optionString); } + else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -545,6 +548,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "set.current") { pipecommand = new SetCurrentCommand(optionString); } else if(commandName == "make.shared") { pipecommand = new SharedCommand(optionString); } else if(commandName == "get.commandinfo") { pipecommand = new GetCommandInfoCommand(optionString); } + else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -669,6 +673,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "set.current") { shellcommand = new SetCurrentCommand(); } else if(commandName == "make.shared") { shellcommand = new SharedCommand(); } else if(commandName == "get.commandinfo") { shellcommand = new GetCommandInfoCommand(); } + else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/deuniquetreecommand.cpp b/deuniquetreecommand.cpp new file mode 100644 index 0000000..7f33ad6 --- /dev/null +++ b/deuniquetreecommand.cpp @@ -0,0 +1,278 @@ +/* + * deuniquetreecommand.cpp + * Mothur + * + * Created by westcott on 5/27/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "deuniquetreecommand.h" + +//********************************************************************************************************************** +vector DeuniqueTreeCommand::setParameters(){ + try { + CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree); + CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "DeuniqueTreeCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string DeuniqueTreeCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The deunique.tree command parameters are tree and name. Both parameters are required unless you have valid current files.\n"; + helpString += "The deunique.tree command should be in the following format: deunique.tree(tree=yourTreeFile, name=yourNameFile).\n"; + helpString += "Example deunique.tree(tree=abrecovery.tree, name=abrecovery.names).\n"; + helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreeFile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "DeuniqueTreeCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +DeuniqueTreeCommand::DeuniqueTreeCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["tree"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "DeuniqueTreeCommand", "DeuniqueTreeCommand"); + exit(1); + } +} +/***********************************************************/ +DeuniqueTreeCommand::DeuniqueTreeCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + map::iterator it; + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["tree"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("tree"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["tree"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + } + + m->runParse = true; + m->Groups.clear(); + m->namesOfGroups.clear(); + m->Treenames.clear(); + m->names.clear(); + + //check for required parameters + treefile = validParameter.validFile(parameters, "tree", true); + if (treefile == "not open") { abort = true; } + else if (treefile == "not found") { //if there is a current design file, use it + treefile = m->getTreeFile(); + if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; } + } + + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { //if there is a current design file, use it + namefile = m->getNameFile(); + if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current name file and the name parameter is required."); m->mothurOutEndLine(); abort = true; } + } + + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); } + } + + } + catch(exception& e) { + m->errorOut(e, "DeuniqueTreeCommand", "DeuniqueTreeCommand"); + exit(1); + } +} + +/***********************************************************/ +int DeuniqueTreeCommand::execute() { + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + m->setTreeFile(treefile); + + //extracts names from tree to make faked out groupmap + Tree* tree = new Tree(treefile); delete tree; + tmap = new TreeMap(); + for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } + + if (m->control_pressed) { delete tmap; return 0; } + + readNamesFile(); + + if (m->control_pressed) { delete tmap; return 0; } + + ReadTree* read = new ReadNewickTree(treefile); + int readOk = read->read(tmap); + if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } + + read->AssembleTrees(); + vector T = read->getTrees(); + delete read; + + //make sure all files match + //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. + int numNamesInTree; + if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } + else { numNamesInTree = m->Treenames.size(); } + + //output any names that are in group file but not in tree + if (numNamesInTree < tmap->getNumSeqs()) { + for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { + //is that name in the tree? + int count = 0; + for (int j = 0; j < m->Treenames.size(); j++) { + if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it + count++; + } + + if (m->control_pressed) { + delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); + m->Groups.clear(); + return 0; + } + + //then you did not find it so report it + if (count == m->Treenames.size()) { + //if it is in your namefile then don't remove + map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); + + if (it == nameMap.end()) { + m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); + tmap->removeSeq(tmap->namesOfSeqs[i]); + i--; //need this because removeSeq removes name from namesOfSeqs + } + } + } + } + + + //print new Tree + string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + "deunique.tre"; + outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); + ofstream out; + m->openOutputFile(outputFile, out); + T[0]->print(out, "deunique"); + out.close(); + + delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } + + //set phylip file as new current phylipfile + string current = ""; + itTypes = outputTypes.find("tree"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); } + } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "DeuniqueTreeCommand", "execute"); + exit(1); + } +} +/*****************************************************************/ +int DeuniqueTreeCommand::readNamesFile() { + try { + m->names.clear(); + numUniquesInName = 0; + + ifstream in; + m->openInputFile(namefile, in); + + string first, second; + map::iterator itNames; + + while(!in.eof()) { + in >> first >> second; m->gobble(in); + + numUniquesInName++; + + itNames = m->names.find(first); + if (itNames == m->names.end()) { + m->names[first] = second; + + //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them + vector dupNames; + m->splitAtComma(second, dupNames); + + for (int i = 0; i < dupNames.size(); i++) { + nameMap[dupNames[i]] = dupNames[i]; + if (i != 0) { tmap->addSeq(dupNames[i], "Group1"); } + } + }else { m->mothurOut(first + " has already been seen in namefile, aborting."); m->mothurOutEndLine(); in.close(); m->names.clear(); m->control_pressed = true; return 1; } + } + in.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "DeuniqueTreeCommand", "readNamesFile"); + exit(1); + } +} +/***********************************************************/ + + + + diff --git a/deuniquetreecommand.h b/deuniquetreecommand.h new file mode 100644 index 0000000..5131c11 --- /dev/null +++ b/deuniquetreecommand.h @@ -0,0 +1,47 @@ +#ifndef DEUNIQUETREECOMMAND_H +#define DEUNIQUETREECOMMAND_H + +/* + * deuniquetreecommand.h + * Mothur + * + * Created by westcott on 5/27/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + + +#include "command.hpp" +#include "treemap.h" +#include "sharedutilities.h" +#include "readtree.h" + +class DeuniqueTreeCommand : public Command { + +public: + DeuniqueTreeCommand(string); + DeuniqueTreeCommand(); + ~DeuniqueTreeCommand() {} + + vector setParameters(); + string getCommandName() { return "deunique.tree"; } + string getCommandCategory() { return "Hypothesis Testing"; } + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Deunique.tree"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + + +private: + TreeMap* tmap; + int numUniquesInName; + + bool abort; + string outputDir, treefile, namefile; + vector outputNames; + map nameMap; + int readNamesFile(); +}; + +#endif diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index adb145a..3b5d873 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -597,7 +597,7 @@ int MatrixOutputCommand::driver(vector thisLookup, int star if (m->control_pressed) { return 1; } - seqDist temp(l, k, (1.0 - tempdata[0])); + seqDist temp(l, k, tempdata[0]); calcDists[i].push_back(temp); } } diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 4363eaa..cc1a374 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -733,7 +733,7 @@ int SummarySharedCommand::driver(vector thisLookup, int sta outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); - seqDist temp(l, k, (1.0 - tempdata[0])); + seqDist temp(l, k, tempdata[0]); calcDists[i].push_back(temp); } outputFileHandle << endl; diff --git a/tree.cpp b/tree.cpp index cd3d7a6..a7d7930 100644 --- a/tree.cpp +++ b/tree.cpp @@ -872,6 +872,11 @@ try { if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } + }else if (mode == "deunique") { + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + out << ":" << tree[node].getBranchLength(); + } } }else { //you are a leaf string leafGroup = tmap->getGroup(tree[node].getName()); @@ -897,6 +902,53 @@ try { if (tree[node].getBranchLength() != -1) { out << ":" << tree[node].getBranchLength(); } + }else if (mode == "deunique") { + map::iterator itNames = m->names.find(tree[node].getName()); + + string outputString = ""; + if (itNames != m->names.end()) { + + vector dupNames; + m->splitAtComma((itNames->second), dupNames); + + if (dupNames.size() == 1) { + outputString += tree[node].getName(); + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + }else { + outputString += "("; + + for (int u = 0; u < dupNames.size()-1; u++) { + outputString += dupNames[u]; + + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + outputString += ","; + } + + outputString += dupNames[dupNames.size()-1]; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(0.0); + } + + outputString += ")"; + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + } + }else { + outputString = tree[node].getName(); + //if there is a branch length then print it + if (tree[node].getBranchLength() != -1) { + outputString += ":" + toString(tree[node].getBranchLength()); + } + + m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); + } + + out << outputString; } } diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 582a323..9c56d22 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -758,8 +758,8 @@ int TreeGroupCommand::process(vector thisLookup) { if (m->control_pressed) { return 1; } //save values in similarity matrix - simMatrix[k][l] = data[0]; - simMatrix[l][k] = data[0]; + simMatrix[k][l] = -(data[0]-1.0); + simMatrix[l][k] = -(data[0]-1.0); } } } -- 2.39.2