From 17261b0ad578de8aac463dd6977a2d6fdee565a1 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 4 May 2009 12:33:27 +0000 Subject: [PATCH] added concensus command --- Mothur.xcodeproj/project.pbxproj | 62 +++++++++++ concensuscommand.cpp | 184 +++++++++++++++++++++++++++++++ concensuscommand.h | 43 ++++++++ 3 files changed, 289 insertions(+) create mode 100644 concensuscommand.cpp create mode 100644 concensuscommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 6fd4a06..7da4ca2 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -35,6 +35,7 @@ 375AA1390F9E433D008EF9B8 /* readcolumn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1330F9E433D008EF9B8 /* readcolumn.cpp */; }; 375AA13A0F9E433D008EF9B8 /* readotu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1350F9E433D008EF9B8 /* readotu.cpp */; }; 375AA13B0F9E433D008EF9B8 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 375AA1370F9E433D008EF9B8 /* readphylip.cpp */; }; + 377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 377326630FAF16E0007ABB8B /* concensuscommand.cpp */; }; 379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; }; 379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; }; 3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; }; @@ -200,6 +201,20 @@ 375AA1360F9E433D008EF9B8 /* readotu.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readotu.h; sourceTree = ""; }; 375AA1370F9E433D008EF9B8 /* readphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylip.cpp; sourceTree = ""; }; 375AA1380F9E433D008EF9B8 /* readphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylip.h; sourceTree = ""; }; + 377326560FAF16DF007ABB8B /* categories.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = categories.pbxbtree; sourceTree = ""; }; + 377326570FAF16DF007ABB8B /* cdecls.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = cdecls.pbxbtree; sourceTree = ""; }; + 377326580FAF16DF007ABB8B /* decls.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = decls.pbxbtree; sourceTree = ""; }; + 377326590FAF16DF007ABB8B /* files.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = files.pbxbtree; sourceTree = ""; }; + 3773265A0FAF16DF007ABB8B /* imports.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = imports.pbxbtree; sourceTree = ""; }; + 3773265B0FAF16DF007ABB8B /* pbxindex.header */ = {isa = PBXFileReference; lastKnownFileType = file; path = pbxindex.header; sourceTree = ""; }; + 3773265C0FAF16DF007ABB8B /* protocols.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = protocols.pbxbtree; sourceTree = ""; }; + 3773265D0FAF16DF007ABB8B /* refs.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = refs.pbxbtree; sourceTree = ""; }; + 3773265F0FAF16DF007ABB8B /* control */ = {isa = PBXFileReference; lastKnownFileType = file; path = control; sourceTree = ""; }; + 377326600FAF16DF007ABB8B /* strings */ = {isa = PBXFileReference; lastKnownFileType = file; path = strings; sourceTree = ""; }; + 377326610FAF16E0007ABB8B /* subclasses.pbxbtree */ = {isa = PBXFileReference; lastKnownFileType = file; path = subclasses.pbxbtree; sourceTree = ""; }; + 377326620FAF16E0007ABB8B /* symbols0.pbxsymbols */ = {isa = PBXFileReference; lastKnownFileType = file; path = symbols0.pbxsymbols; sourceTree = ""; }; + 377326630FAF16E0007ABB8B /* concensuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = concensuscommand.cpp; sourceTree = ""; }; + 377326640FAF16E0007ABB8B /* concensuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = concensuscommand.h; sourceTree = ""; }; 379293C10F2DE73400B9034A /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treemap.h; sourceTree = ""; }; 379293C20F2DE73400B9034A /* treemap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treemap.cpp; sourceTree = ""; }; 3792946E0F2E191800B9034A /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsimonycommand.h; sourceTree = ""; }; @@ -411,6 +426,7 @@ 08FB7794FE84155DC02AAC07 /* Mothur */ = { isa = PBXGroup; children = ( + 377326530FAF16DF007ABB8B /* build */, 08FB7795FE84155DC02AAC07 /* Source */, C6859E8C029090F304C91782 /* Documentation */, 1AB674ADFE9D54B511CA2CBB /* Products */, @@ -504,6 +520,49 @@ name = Products; sourceTree = ""; }; + 377326530FAF16DF007ABB8B /* build */ = { + isa = PBXGroup; + children = ( + 377326540FAF16DF007ABB8B /* Mothur.build */, + ); + path = build; + sourceTree = ""; + }; + 377326540FAF16DF007ABB8B /* Mothur.build */ = { + isa = PBXGroup; + children = ( + 377326550FAF16DF007ABB8B /* Mothur.pbxindex */, + ); + path = Mothur.build; + sourceTree = ""; + }; + 377326550FAF16DF007ABB8B /* Mothur.pbxindex */ = { + isa = PBXGroup; + children = ( + 377326560FAF16DF007ABB8B /* categories.pbxbtree */, + 377326570FAF16DF007ABB8B /* cdecls.pbxbtree */, + 377326580FAF16DF007ABB8B /* decls.pbxbtree */, + 377326590FAF16DF007ABB8B /* files.pbxbtree */, + 3773265A0FAF16DF007ABB8B /* imports.pbxbtree */, + 3773265B0FAF16DF007ABB8B /* pbxindex.header */, + 3773265C0FAF16DF007ABB8B /* protocols.pbxbtree */, + 3773265D0FAF16DF007ABB8B /* refs.pbxbtree */, + 3773265E0FAF16DF007ABB8B /* strings.pbxstrings */, + 377326610FAF16E0007ABB8B /* subclasses.pbxbtree */, + 377326620FAF16E0007ABB8B /* symbols0.pbxsymbols */, + ); + path = Mothur.pbxindex; + sourceTree = ""; + }; + 3773265E0FAF16DF007ABB8B /* strings.pbxstrings */ = { + isa = PBXGroup; + children = ( + 3773265F0FAF16DF007ABB8B /* control */, + 377326600FAF16DF007ABB8B /* strings */, + ); + path = strings.pbxstrings; + sourceTree = ""; + }; 37D928A60F2133C0001D4494 /* calculators */ = { isa = PBXGroup; children = ( @@ -610,6 +669,8 @@ 37D927C70F21331F001D4494 /* collectcommand.cpp */, 37D927CC0F21331F001D4494 /* collectsharedcommand.h */, 37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */, + 377326640FAF16E0007ABB8B /* concensuscommand.h */, + 377326630FAF16E0007ABB8B /* concensuscommand.cpp */, 37B28F660F27590100808A62 /* deconvolutecommand.h */, 37B28F670F27590100808A62 /* deconvolutecommand.cpp */, A70B53A50F4CD7AD0064797E /* getgroupcommand.h */, @@ -885,6 +946,7 @@ 375AA13B0F9E433D008EF9B8 /* readphylip.cpp in Sources */, 7EC3D4550FA0FFF900338DA5 /* coverage.cpp in Sources */, 7EC3D4560FA0FFF900338DA5 /* whittaker.cpp in Sources */, + 377326650FAF16E0007ABB8B /* concensuscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/concensuscommand.cpp b/concensuscommand.cpp new file mode 100644 index 0000000..cc00cd0 --- /dev/null +++ b/concensuscommand.cpp @@ -0,0 +1,184 @@ +/* + * concensuscommand.cpp + * Mothur + * + * Created by Sarah Westcott on 4/29/09. + * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. + * + */ + +#include "concensuscommand.h" + +//********************************************************************************************************************** + +ConcensusCommand::ConcensusCommand(){ + try { + globaldata = GlobalData::getInstance(); + t = globaldata->gTree; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** + +ConcensusCommand::~ConcensusCommand(){} + +//********************************************************************************************************************** + +int ConcensusCommand::execute(){ + try { + + if (t.size() == 0) { return 0; } + else { + numNodes = t[0]->getNumNodes(); + numLeaves = t[0]->getNumLeaves(); + } + + //initialize nodepairs + nodePairs.resize(numNodes-numLeaves); + + //process each tree and fill counts + for (int i = 0; i < t.size(); i++) { + //process each nonleaf node + for (int j = numLeaves; j < numNodes; j++) { + //get pairing + int leftChild = t[i]->tree[j].getLChild(); + int rightChild = t[i]->tree[j].getRChild(); + string pair = toString(leftChild) + "-" + toString(rightChild); + + //if this is an existing pairing for this node then increment the count otherwise add new pairing. + it = nodePairs[j-numLeaves].find(pair); + if (it != nodePairs[j-numLeaves].end()) {//already have that score + nodePairs[j-numLeaves][pair]++; + }else{//first time we have seen this score + nodePairs[j-numLeaves][pair] = 1; + } + } + } + + + //print out pairings for testing + /*for (int i = 0; i < nodePairs.size(); i++) { + cout << "pairs for node " << i+numLeaves << endl; + for (it = nodePairs[i].begin(); it != nodePairs[i].end(); it++) { + cout << " pair = " << it->first << " count = " << it->second << endl; + } + }*/ + + //open file for pairing not included in the tree + notIncluded = getRootName(globaldata->inputFileName) + "concensuspairs"; + openOutputFile(notIncluded, out2); + + concensusTree = new Tree(); + + //set relationships for nonleaf nodes + for (int j = numLeaves; j < numNodes; j++) { + + //find that nodes pairing with the highest count + int large = 0; + for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) { + if (it->second > large) { large = it->second; it2 = it; } + } + + string pair = it2->first; + int pos = pair.find_first_of('-'); + string lefts = pair.substr(0, pos); + string rights = pair.substr(pos+1, pair.length()); + + //converts string to int + int left = atoi(lefts.c_str()); + int right = atoi(rights.c_str()); + + //set parents and children + concensusTree->tree[j].setChildren(left, right); + concensusTree->tree[left].setParent(j); + concensusTree->tree[right].setParent(j); + + // set Branchlengths // + //if your children are leaves remove their branchlengths + if (concensusTree->tree[left].getLChild() == -1) { concensusTree->tree[left].setBranchLength(1.0); } + if (concensusTree->tree[right].getLChild() == -1) { concensusTree->tree[right].setBranchLength(1.0); } + + //set your branch length to the percentage of trees with this pairing + float bLength = (float) it2->second / (float) t.size(); + concensusTree->tree[j].setBranchLength(bLength); + + //print out set used + string leftName, rightName; + getNames(it2->first, leftName, rightName); + + out2 << "Node " << j+1 << " in concensus tree: " << leftName << "-" << rightName << '\t' << (float)it2->second / (float) t.size() << endl; + out2 << "Node " << j+1 << " sets not included in concensus tree: " << endl; + + //print out sets not included + for (it = nodePairs[j-numLeaves].begin(); it != nodePairs[j-numLeaves].end(); it++) { + if (it != it2) { + getNames(it->first, leftName, rightName); + out2 << leftName << "-" << rightName << '\t' << (float)it->second / (float) t.size() << endl; + } + } + out2 << endl; + + } + + concensusTree->assembleTree(); + + outputFile = getRootName(globaldata->inputFileName) + "concensus.tre"; + openOutputFile(outputFile, out); + + concensusTree->print(out); + + out.close(); out2.close(); + + delete concensusTree; + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** + +void ConcensusCommand::getNames(string pair, string& leftName, string& rightName) { + try { + int pos = pair.find_first_of('-'); + string lefts = pair.substr(0, pos); + string rights = pair.substr(pos+1, pair.length()); + + //converts string to int + int left = atoi(lefts.c_str()); + int right = atoi(rights.c_str()); + + //get name + leftName = concensusTree->tree[left].getName(); + rightName = concensusTree->tree[right].getName(); + + //if you are not a leaf use node number as name + if (leftName == "") { leftName = toString(left+1); } + if (rightName == "") { rightName = toString(right+1); } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ConcensusCommand class function getNames. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + + diff --git a/concensuscommand.h b/concensuscommand.h new file mode 100644 index 0000000..5550b3b --- /dev/null +++ b/concensuscommand.h @@ -0,0 +1,43 @@ +#ifndef CONCENSUSCOMMAND_H +#define CONCENSUSCOMMAND_H +/* + * concensuscommand.h + * Mothur + * + * Created by Sarah Westcott on 4/29/09. + * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. + * + */ + +#include "command.hpp" +#include "tree.h" +#include "treemap.h" +#include "sharedutilities.h" + +class GlobalData; + +class ConcensusCommand : public Command { + +public: + ConcensusCommand(); + ~ConcensusCommand(); + int execute(); + +private: + GlobalData* globaldata; + SharedUtil* util; + vector t; + Tree* concensusTree; + vector< map > nodePairs; // -one entry in vector for each internal node. + // i.e. if node 7's child pairs are 1,2 ten times and 1,3 20 times then the map would be [12, 10] and [13, 20]; + map::iterator it; + map::iterator it2; + string outputFile, notIncluded; + ofstream out, out2; + int numNodes, numLeaves; + + void getNames(string, string&, string&); +}; + +#endif + -- 2.39.5