CHANGES IN APE VERSION 2.2-3
+NEW FEATURES
+
+ o The new function extract.clade extracts a clade from a tree by
+ specifying a node number or label.
+
+ o fastme.bal() has two new options 'spr' and 'tbr' to perform tree
+ operations of the same names.
+
+ o dist.dna() can now return the number of site differences by
+ specifying model="N".
+
+
BUG FIXES
o chronopl() did not work with CV = TRUE.
the number of lineages with non-binary trees.
+OTHER CHANGES
+
+ o ape has now a namespace.
+
+ o drip.tip() has been improved: it should be much faster and work
+ better in some cases (e.g., see the example in ?zoom).
+
+
CHANGES IN APE VERSION 2.2-2
Package: ape
Version: 2.2-3
-Date: 2008-12-20
+Date: 2009-01-07
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong,
Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel,
and clock-like trees using mean path lengths, non-parametric rate
smoothing and penalized likelihood, classifying genes in trees using
the Klastorin-Misawa-Tajima approach. Phylogeny estimation can be done
- with the NJ, BIONJ, ME, and ML methods.
+ with the NJ, BIONJ, and ME methods.
License: GPL (>= 2)
URL: http://ape.mpl.ird.fr/
--- /dev/null
+useDynLib(ape)
+
+export(as.DNAbin, as.phylo, base.freq, dist.dna, dist.nodes, drop.tip, ltt.plot, nj, rcoal, rtree, rmtree, read.dna, read.nexus, read.tree, vcv.phylo, write.dna, write.nexus, write.tree)
+
+import(gee, nlme)
+importFrom(lattice, xyplot, panel.lines, panel.points)
+
+S3method(print, phylo)
+S3method(plot, phylo)
+S3method(as.hclust, phylo)
+S3method(reorder, phylo)
+S3method(print, multiPhylo)
+S3method(plot, multiPhylo)
+S3method("[", multiPhylo)
+S3method("[[", multiPhylo)
+S3method("$", multiPhylo)
+S3method(unique, multiPhylo)
+S3method(print, DNAbin)
+S3method(cbind, DNAbin)
+S3method(rbind, DNAbin)
+S3method("[", DNAbin)
+S3method(summary, DNAbin)
+S3method(as.character, DNAbin)
-## DNA.R (2008-10-08)
+## DNA.R (2008-12-22)
## Manipulations and Comparisons of DNA Sequences
as.matrix = FALSE)
{
MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93",
- "GG95", "LOGDET", "BH87", "PARALIN")
+ "GG95", "LOGDET", "BH87", "PARALIN", "N")
imod <- which(MODELS == toupper(model))
if (imod == 11 && variance) {
warning("computing variance temporarily not available for model BH87.")
elen <- tree$edge.length
xmat <- tree$edge
## added by Elizabeth Purdom (2008-06-19):
- node.lab<-tree$node.label
- nnode<-tree$Nnode
- ntip<-length(tree$tip.label)
+ node.lab <- tree$node.label
+ nnode <- tree$Nnode
+ ntip <- length(tree$tip.label)
## end
singles <- NA
while (length(singles) > 0) {
## END
elen[prev.node] <- elen[prev.node] + elen[next.node]
## added by Elizabeth Purdom (2008-06-19):
- if(!is.null(node.lab)) node.lab<-node.lab[-c(i-ntip)]
- nnode<-nnode-1
+ if (!is.null(node.lab)) node.lab <- node.lab[-c(i - ntip)]
+ nnode <- nnode - 1
## end
elen <- elen[-next.node]
}
tree$edge <- xmat
tree$edge.length <- elen
## added by Elizabeth Purdom (2008-06-19):
- tree$node.label<-node.lab
- tree$Nnode<-nnode
+ tree$node.label <- node.lab
+ tree$Nnode <- nnode
## end
tree
}
-## drop.tip.R (2008-04-17)
+## drop.tip.R (2009-01-07)
## Remove Tips in a Phylogenetic Tree
-## Copyright 2003-2008 Emmanuel Paradis
+## Copyright 2003-2009 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
-drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE,
- root.edge = 0)
+extract.clade <- function(phy, node, root.edge = 0)
{
- if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"")
- phy <- new2old.phylo(phy)
+ Ntip <- length(phy$tip.label)
+ ROOT <- Ntip + 1
+ Nedge <- dim(phy$edge)[1]
+ wbl <- !is.null(phy$edge.length)
+ if (length(node) > 1) {
+ node <- node[1]
+ warning("only the first value of 'node' has been considered")
+ }
+ if (is.character(node)) {
+ if (is.null(phy$node.label))
+ stop("the tree has no node labels")
+ node <- which(phy$node.label %in% node) + Ntip
+ }
+ if (node <= Ntip) stop("node number must be greater than the number of tips")
+ if (node == ROOT) return(phy)
+ phy <- reorder(phy) # insure it is in cladewise order
+ root.node <- which(phy$edge[, 2] == node)
+ start <- root.node + 1 # start of the clade looked for
+ anc <- phy$edge[root.node, 1] # the ancestor of 'node'
+ next.anc <- which(phy$edge[-(1:start), 1] == anc) # find the next occurence of 'anc'
+
+ keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge
+
+ if (root.edge) {
+ NewRootEdge <- phy$edge.length[root.node]
+ root.edge <- root.edge - 1
+ while (root.edge) {
+ if (anc == ROOT) break
+ i <- which(phy$edge[, 2] == anc)
+ NewRootEdge <- NewRootEdge + phy$edge.length[i]
+ root.edge <- root.edge - 1
+ anc <- phy$edge[i, 1]
+ }
+ if (root.edge && !is.null(phy$root.edge))
+ NewRootEdge <- NewRootEdge + phy$root.edge
+ phy$root.edge <- NewRootEdge
+ }
+
+ phy$edge <- phy$edge[keep, ]
+ if (wbl) phy$edge.length <- phy$edge.length[keep]
+ TIPS <- phy$edge[, 2] <= Ntip
+ tip <- phy$edge[TIPS, 2]
+ phy$tip.label <- phy$tip.label[tip]
+ ## keep the ordering so no need to reorder tip.label:
+ phy$edge[TIPS, 2] <- order(tip)
+ if (!is.null(phy$node.label))
+ phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
+ Ntip <- length(phy$tip.label)
+ phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
+ ## The block below renumbers the nodes so that they conform
+ ## to the "phylo" format -- same as in root()
+ newNb <- integer(Ntip + phy$Nnode)
+ newNb[node] <- Ntip + 1L
+ sndcol <- phy$edge[, 2] > Ntip
+ ## executed from right to left, so newNb is modified before phy$edge:
+ phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
+ (Ntip + 2):(Ntip + phy$Nnode)
+ phy$edge[, 1] <- newNb[phy$edge[, 1]]
+ phy
+}
+
+drop.tip <-
+ function(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0)
+{
+ if (class(phy) != "phylo")
+ stop('object "phy" is not of class "phylo"')
+ Ntip <- length(phy$tip.label)
+ NEWROOT <- ROOT <- Ntip + 1
+ Nnode <- phy$Nnode
+ Nedge <- dim(phy$edge)[1]
if (subtree) {
trim.internal <- TRUE
- edge.bak <- phy$edge
+ tr <- reorder(phy, "pruningwise")
+ N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode),
+ as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]),
+ as.integer(Nedge), double(Ntip + Nnode),
+ DUP = FALSE, PACKAGE = "ape")[[6]]
}
- tmp <- as.numeric(phy$edge)
- nb.tip <- max(tmp)
- ## fix by Yan Wong:
- nodes <- setdiff(tmp,1:nb.tip) #not sure if this also needs sorting into order
- ## end
- nobr <- is.null(phy$edge.length)
- if (is.numeric(tip)) tip <- phy$tip.label[tip]
- ## find the tips to drop...:
- del <- phy$tip.label %in% tip
- ## ... and the corresponding terminal branches:
- ind <- which(phy$edge[, 2] %in% as.character(which(del)))
- ## drop them...:
- phy$edge <- phy$edge[-ind, ]
- ## ... and the lengths if applies:
- if (!nobr) phy$edge.length <- phy$edge.length[-ind]
- ## drop the tip labels:
- phy$tip.label <- phy$tip.label[!del]
+ wbl <- !is.null(phy$edge.length)
+ edge1 <- phy$edge[, 1] # local copies
+ edge2 <- phy$edge[, 2] #
+ keep <- !logical(Nedge)
+ ## find the tips to drop:
+ if (is.character(tip))
+ tip <- which(phy$tip.label %in% tip)
+ trms <- edge2 <= Ntip
+ ## delete the terminal edges given by `tip':
+ keep[match(tip, edge2)] <- FALSE
+
if (trim.internal) {
- if (root.edge) {
- ## find the MRCA of the remaining tips:
- seq.nod <- list()
- ## This is modified since some tips were deleted!!
- for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) {
- vec <- i
- j <- i
- while (j != "-1") {
- ind <- which(phy$edge[, 2] == j)
- j <- phy$edge[ind, 1]
- vec <- c(vec, j)
- }
- seq.nod[[i]] <- vec
- }
- sn <- lapply(seq.nod, rev)
- i <- 1
- x <- unlist(lapply(sn, function(x) x[i]))
- while (length(unique(x)) == 1) {
- x <- unlist(lapply(sn, function(x) x[i]))
- i <- i + 1
- }
- MRCA <- sn[[1]][i - 2]
- newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge
- for (i in 1:root.edge) {
- ind <- which(phy$edge[, 2] == MRCA)
- newrootedge <- newrootedge + phy$edge.length[ind]
- MRCA <- phy$edge[ind, 1]
- if (MRCA == "-1" && i < root.edge) {
- newrootedge <- newrootedge
- break
- }
- }
- phy$root.edge <- newrootedge
- } else {
- if (!is.null(phy$root.edge)) phy$root.edge <- NULL
+ ## delete the internal edges that do not have descendants
+ ## anymore (ie, they are in the 2nd column of `edge' but
+ ## not in the 1st one)
+ repeat {
+ sel <- !(edge2 %in% edge1[keep]) & !trms & keep
+ if (!sum(sel)) break
+ keep[sel] <- FALSE
}
- while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) {
- temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0]
- k <- temp %in% phy$edge[, 1]
- ind <- phy$edge[, 2] %in% temp[!k]
- phy$edge <- phy$edge[!ind, ]
- if (!nobr) phy$edge.length <- phy$edge.length[!ind]
+ if (subtree) {
+ ## keep the subtending edge(s):
+ subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep]
+ ## <FIXME> 'if (... ' needed below?
+ if (any(subt)) keep[which(subt)] <- TRUE
}
- } else {
- ## fix by Yan Wong:
- k <- nodes %in% phy$edge[, 1] #nodes that have descendants
- ind <- phy$edge[, 2] %in% nodes[!k]
- phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind)))
- if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind))
- else new.tip.label <- phy$node.label[!k]
- phy$tip.label <- c(phy$tip.label, new.tip.label)
- #N.B. phy$node.label can be left: it is altered later
- ## end
- }
- useless.nodes <- names(which(table(phy$edge[, 1]) == 1))
- if (subtree) {
- if (!nobr) mnbr <- mean(phy$edge.length)
- if (length(useless.nodes) == 1) n <- length(tip) else {
- seq.nod <- list()
- wh <- numeric(0)
- for (i in as.character(which(del))) { # it is not needed to loop through all tips!
- vec <- i
- j <- i
- while (!(j %in% useless.nodes)) {
- ind <- which(edge.bak[, 2] == j)
- wh <- c(wh, ind)
- j <- edge.bak[ind, 1]
- vec <- c(vec, j)
+ if (root.edge && wbl) {
+ degree <- tabulate(edge1[keep])
+ if (degree[ROOT] == 1) {
+ j <- integer(0) # will store the indices of the edges below the new root
+ repeat {
+ i <- which(edge1 == NEWROOT & keep)
+ j <- c(i, j)
+ NEWROOT <- edge2[i]
+ degree <- tabulate(edge1[keep])
+ if (degree[NEWROOT] > 1) break
}
- seq.nod[[i]] <- vec
- }
- n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1])))
- }
- new.lab <- paste("[", n, "_tips]", sep = "")
- for (i in 1:length(useless.nodes)) {
- wh <- which(phy$edge[, 1] == useless.nodes[i])
- phy$tip.label <- c(phy$tip.label, new.lab[i])
- if (wh == dim(phy$edge)[1]) {
- phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i)))
- if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr)
- } else {
- phy$edge <- rbind(phy$edge[1:wh, ],
- c(useless.nodes[i], as.character(nb.tip + i)),
- phy$edge[(wh + 1):dim(phy$edge)[1], ])
- if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr,
- phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)])
- }
- }
- } else {
- for (i in useless.nodes) {
- ind1 <- which(phy$edge[, 1] == i)
- ind2 <- which(phy$edge[, 2] == i)
- phy$edge[ind2, 2] <- phy$edge[ind1, 2]
- phy$edge <- phy$edge[-ind1, ]
- if (!nobr) {
- phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1]
- phy$edge.length <- phy$edge.length[-ind1]
+ keep[j] <- FALSE
+ if (length(j) > root.edge) j <- 1:root.edge
+ NewRootEdge <- sum(phy$edge.length[j])
+ if (length(j) < root.edge && !is.null(phy$root.edge))
+ NewRootEdge <- NewRootEdge + phy$root.edge
+ phy$root.edge <- NewRootEdge
}
}
}
- tmp <- as.numeric(phy$edge)
- if (!is.null(phy$node.label)) {
- x <- unique(tmp)
- x <- x[x < 0]
- phy$node.label <- phy$node.label[-x]
- }
- n <- length(tmp)
- nodes <- tmp < 0
- ind.nodes <- (1:n)[nodes]
- ind.tips <- (1:n)[!nodes]
- new.nodes <- -as.numeric(factor(-tmp[nodes]))
- new.tips <- as.numeric(factor(tmp[!nodes]))
- tmp[ind.nodes] <- new.nodes
- tmp[ind.tips] <- new.tips
- dim(tmp) <- c(n / 2, 2)
- mode(tmp) <- "character"
- phy$edge <- tmp
- phy <- old2new.phylo(phy)
- if (!trim.internal || subtree) {
- S <- write.tree(phy)
- phy <- if (nobr) clado.build(S) else tree.build(S)
+
+ if (!root.edge) phy$root.edge <- NULL
+
+ ## upate the tree; 1) drop the edges and tip labels
+ phy$edge <- phy$edge[keep, ]
+ if (wbl) phy$edge.length <- phy$edge.length[keep]
+ phy$tip.label <- phy$tip.label[-tip]
+ ## 2) renumber the remaining tips now
+ TIPS <- phy$edge[, 2] <= Ntip
+ ## keep the ordering so no need to reorder tip.label:
+ phy$edge[TIPS, 2] <- order(phy$edge[TIPS, 2])
+ Ntip <- length(phy$tip.label) # update Ntip
+
+ ## make new tip labels if necessary
+ if (subtree || !trim.internal) {
+ new.trms <- !(phy$edge[, 2] %in% phy$edge[, 1]) & phy$edge[, 2] > Ntip
+ node2tip <- phy$edge[new.trms, 2]
+ if (subtree)
+ new.lab <- paste("[", N[node2tip], "_tips]", sep = "")
+ else {
+ new.lab <-
+ if (is.null(phy$node.label)) rep("NA", length(node2tip))
+ else phy$node.label[node2tip - Ntip]
+ }
+ ## change the #'s in the edge matrix
+ new.tip <- Ntip + 1:length(node2tip)
+ phy$edge[new.trms, 2] <- new.tip
+ phy$tip.label[new.tip] <- new.lab
+ Ntip <- length(phy$tip.label)
+ if (!is.null(phy$node.label))
+ phy$node.label <- phy$node.label[-(node2tip - Ntip)]
}
- phy
+ phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L # 3) update phy$Nnode
+
+ ## The block below renumbers the nodes so that they conform
+ ## to the "phylo" format -- same as in root()
+ newNb <- integer(Ntip + phy$Nnode)
+ newNb[NEWROOT] <- Ntip + 1L
+ sndcol <- phy$edge[, 2] > Ntip
+ ## executed from right to left, so newNb is modified before phy$edge:
+ phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
+ (Ntip + 2):(Ntip + phy$Nnode)
+ phy$edge[, 1] <- newNb[phy$edge[, 1]]
+
+ collapse.singles(phy)
}
-## me.R (2008-01-12)
+## me.R (2009-01-07)
## Tree Estimation Based on Minimum Evolution Algorithm
## Copyright 2007 Vincent Lefort with modifications by
-## Emmanuel Paradis (2008)
+## Emmanuel Paradis (2008-2009)
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
-.fastme <- function(X, nni, lib)
+fastme.bal <- function(X, nni = TRUE, spr = TRUE, tbr = TRUE)
{
if (is.matrix(X)) X <- as.dist(X)
N <- as.integer(attr(X, "Size"))
labels <- sprintf("%6s", 1:N)
edge1 <- edge2 <- integer(2*N - 3)
- ans <- .C(lib, as.double(X), N, labels, as.integer(nni),
+
+ ans <- .C("me_b", as.double(X), N, labels, as.integer(nni),
+ as.integer(spr), as.integer(tbr), edge1, edge2,
+ double(2*N - 3), character(N), PACKAGE = "ape")
+
+ labels <- substr(ans[[10]], 1, 6)
+ LABS <- attr(X, "Labels")
+ labels <- if (!is.null(LABS)) LABS[as.numeric(labels)]
+ else gsub("^ ", "", labels)
+ structure(list(edge = cbind(ans[[7]], ans[[8]]), edge.length = ans[[9]],
+ tip.label = labels, Nnode = N - 2L),
+ class = "phylo")
+}
+
+fastme.ols <- function(X, nni = TRUE)
+{
+ if (is.matrix(X)) X <- as.dist(X)
+ N <- as.integer(attr(X, "Size"))
+ labels <- sprintf("%6s", 1:N)
+ edge1 <- edge2 <- integer(2*N - 3)
+ ans <- .C("me_o", as.double(X), N, labels, as.integer(nni),
edge1, edge2, double(2*N - 3), character(N),
PACKAGE = "ape")
labels <- substr(ans[[8]], 1, 6)
labels <- if (!is.null(LABS)) LABS[as.numeric(labels)]
else gsub("^ ", "", labels)
structure(list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]],
- tip.label = labels, Nnode = N - 2),
+ tip.label = labels, Nnode = N - 2L),
class = "phylo")
}
-fastme.bal <- function(X, nni = TRUE) .fastme(X, nni, "me_b")
-
-fastme.ols <- function(X, nni = TRUE) .fastme(X, nni, "me_o")
-
bionj <- function(X)
{
if (is.matrix(X)) X <- as.dist(X)
labels <- if (!is.null(LABS)) LABS[as.numeric(labels)]
else gsub("^ ", "", labels)
structure(list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]],
- tip.label = labels, Nnode = N - 2),
+ tip.label = labels, Nnode = N - 2L),
class = "phylo")
}
comments, or bug reports: thanks to all of you!
Significant bug fixes were provided by Cécile Ané, James Bullard,
-Éric Durand, Olivier François, Bret Larget, Elizabeth Purdom,
-Klaus Schliep, Li-San Wang, and Yan Wong. Contact me if I forgot
-someone.
+Éric Durand, Olivier François, Bret Larget, Nick Matzke,
+Elizabeth Purdom, Klaus Schliep, Li-San Wang, and Yan Wong. Contact
+me if I forgot someone.
Kurt Hornik, of the R Core Team, helped in several occasions to
fix some problems and bugs.
\arguments{
\item{x}{a matrix or a list containing the DNA sequences.}
\item{model}{a character string specifying the evlutionary model to be
- used; must be one of \code{"raw"}, \code{"JC69"}, \code{"K80"} (the
- default), \code{"F81"}, \code{"K81"}, \code{"F84"}, \code{"BH87"},
- \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"}, or
- \code{"paralin"}.}
+ used; must be one of \code{"raw"}, \code{"N"}, \code{"JC69"},
+ \code{"K80"} (the default), \code{"F81"}, \code{"K81"},
+ \code{"F84"}, \code{"BH87"}, \code{"T92"}, \code{"TN93"},
+ \code{"GG95"}, \code{"logdet"}, or \code{"paralin"}.}
\item{variance}{a logical indicating whether to compute the variances
of the distances; defaults to \code{FALSE} so the variances are not
computed.}
brief description is given below; more details can be found in the
References.
- \item{``raw''}{This is simply the proportion of sites that differ
- between each pair of sequences. This may be useful to draw
- ``saturation plots''. The options \code{variance} and \code{gamma}
- have no effect, but \code{pairwise.deletion} can.}
+ \item{``raw'', ``N''}{This is simply the proportion or the number of
+ sites that differ between each pair of sequences. This may be useful
+ to draw ``saturation plots''. The options \code{variance} and
+ \code{gamma} have no effect, but \code{pairwise.deletion} can.}
\item{``JC69''}{This model was developed by Jukes and Cantor (1969). It
assumes that all substitutions (i.e. a change of a base by another
\name{drop.tip}
\alias{drop.tip}
+\alias{extract.clade}
\title{Remove Tips in a Phylogenetic Tree}
\usage{
drop.tip(phy, tip, trim.internal = TRUE, subtree = FALSE,
root.edge = 0)
+extract.clade(phy, node, root.edge = 0)
}
\arguments{
\item{phy}{an object of class \code{"phylo"}.}
\item{root.edge}{an integer giving the number of internal branches to
be used to build the new root edge. This has no effect if
\code{trim.internal = FALSE}.}
+ \item{node}{a node number or label.}
}
\description{
- This function removes the terminal branches of a phylogenetic tree,
+ \code{drop.tip} removes the terminal branches of a phylogenetic tree,
possibly removing the corresponding internal branches.
+
+ \code{extract.clade} does the inverse operation: it keeps all the tips
+ from a given node, and deletes all the other tips.
}
\details{
The argument \code{tip} can be either character or numeric. In the
second case the numbers of these labels in the vector
\code{phy$tip.label} are given.
+ This also applies to \code{node}, but if this argument is character
+ and the tree has no node label, this results in an error. If more than
+ one value is given with \code{node} (i.e., a vector of length two or
+ more), only the first one is used with a warning.
+
If \code{trim.internal = FALSE}, the new tips are given \code{"NA"} as
labels, unless there are node labels in the tree in which case they
are used.
Minimum Evolution algorithm of Desper and Gascuel (2002).
}
\usage{
- fastme.bal(X, nni = TRUE)
+ fastme.bal(X, nni = TRUE, spr = TRUE, tbr = TRUE)
fastme.ols(X, nni = TRUE)
}
\arguments{
\item{X}{a distance matrix; may be an object of class \code{"dist"}.}
\item{nni}{a boolean value; TRUE to do NNIs (default).}
+ \item{spr}{ditto for SPRs.}
+ \item{tbr}{ditto for TBRs.}
}
\value{
an object of class \code{"phylo"}.
-/*#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "graph.h"
-#include "main.h"*/
-
#include "me.h"
//boolean leaf(node *v);
edge *findBottomLeft(edge *e);
edge *topFirstTraverse(tree *T, edge *e);
edge *moveUpRight(edge *e);
-double wf(double lambda, double D_LR, double D_LU, double D_LD,
+double wf(double lambda, double D_LR, double D_LU, double D_LD,
double D_RU, double D_RD, double D_DU);*/
/*NNI functions for unweighted OLS topological switches*/
if (T->root == f->tail)
{
if (leaf(e->head))
- A[e->head->index][f->head->index] =
- A[f->head->index][e->head->index] =
+ A[e->head->index][f->head->index] =
+ A[f->head->index][e->head->index] =
D[e->head->index2][f->tail->index2];
else
{
g = e->head->leftEdge;
h = e->head->rightEdge;
- A[e->head->index][f->head->index] =
- A[f->head->index][e->head->index] =
+ A[e->head->index][f->head->index] =
+ A[f->head->index][e->head->index] =
(g->bottomsize*A[f->head->index][g->head->index]
+ h->bottomsize*A[f->head->index][h->head->index])
- /e->bottomsize;
+ /e->bottomsize;
}
}
- else
+ else
{
g = f->tail->parentEdge;
fillTableUp(e,g,A,D,T); /*recursive call*/
h = siblingEdge(f);
- A[e->head->index][f->head->index] =
- A[f->head->index][e->head->index] =
+ A[e->head->index][f->head->index] =
+ A[f->head->index][e->head->index] =
(g->topsize*A[e->head->index][g->head->index]
- + h->bottomsize*A[e->head->index][h->head->index])/f->topsize;
+ + h->bottomsize*A[e->head->index][h->head->index])/f->topsize;
}
}
double *lambda;
double D_LR, D_LU, D_LD, D_RD, D_RU, D_DU;
double w1,w2,w0;
-
+
if ((leaf(e->tail)) || (leaf(e->head)))
return(NONE);
lambda = (double *)malloc(3*sizeof(double));
a = e->tail->parentEdge->topsize;
f = siblingEdge(e);
- b = f->bottomsize;
+ b = f->bottomsize;
c = e->head->leftEdge->bottomsize;
d = e->head->rightEdge->bottomsize;
lambda[0] = ((double) b*c + a*d)/((a + b)*(c+d));
- lambda[1] = ((double) b*c + a*d)/((a + c)*(b+d));
+ lambda[1] = ((double) b*c + a*d)/((a + c)*(b+d));
lambda[2] = ((double) c*d + a*b)/((a + d)*(b+c));
-
+
D_LR = A[e->head->leftEdge->head->index][e->head->rightEdge->head->index];
D_LU = A[e->head->leftEdge->head->index][e->tail->index];
D_LD = A[e->head->leftEdge->head->index][f->head->index];
printf("Weight dropping by %lf.\n",w0 - w1);
printf("New weight should be %lf.\n",T->weight + w1 - w0);
}*/
- return(LEFT);
+ return(LEFT);
}
}
-
int *initPerm(int size);
-void NNIupdateAverages(double **A, edge *e, edge *par, edge *skew,
+void NNIupdateAverages(double **A, edge *e, edge *par, edge *skew,
edge *swap, edge *fixed, tree *T)
{
node *v;
edge *elooper;
v = e->head;
/*first, v*/
- A[e->head->index][e->head->index] =
- (swap->bottomsize*
+ A[e->head->index][e->head->index] =
+ (swap->bottomsize*
((skew->bottomsize*A[skew->head->index][swap->head->index]
- + fixed->bottomsize*A[fixed->head->index][swap->head->index])
+ + fixed->bottomsize*A[fixed->head->index][swap->head->index])
/ e->bottomsize) +
par->topsize*
((skew->bottomsize*A[skew->head->index][par->head->index]
- + fixed->bottomsize*A[fixed->head->index][par->head->index])
+ + fixed->bottomsize*A[fixed->head->index][par->head->index])
/ e->bottomsize)
- ) / e->topsize;
-
- elooper = findBottomLeft(e); /*next, we loop over all the edges
+ ) / e->topsize;
+
+ elooper = findBottomLeft(e); /*next, we loop over all the edges
which are below e*/
- while (e != elooper)
+ while (e != elooper)
{
- A[e->head->index][elooper->head->index] =
- A[elooper->head->index][v->index]
+ A[e->head->index][elooper->head->index] =
+ A[elooper->head->index][v->index]
= (swap->bottomsize*A[elooper->head->index][swap->head->index] +
- par->topsize*A[elooper->head->index][par->head->index])
+ par->topsize*A[elooper->head->index][par->head->index])
/ e->topsize;
elooper = depthFirstTraverse(T,elooper);
}
elooper = findBottomLeft(swap); /*next we loop over all the edges below and
- including swap*/
+ including swap*/
while (swap != elooper)
{
- A[e->head->index][elooper->head->index] =
+ A[e->head->index][elooper->head->index] =
A[elooper->head->index][e->head->index]
- = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
- fixed->bottomsize*A[elooper->head->index][fixed->head->index])
+ = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
+ fixed->bottomsize*A[elooper->head->index][fixed->head->index])
/ e->bottomsize;
elooper = depthFirstTraverse(T,elooper);
}
/*now elooper = skew */
- A[e->head->index][elooper->head->index] =
+ A[e->head->index][elooper->head->index] =
A[elooper->head->index][e->head->index]
- = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
- fixed->bottomsize* A[elooper->head->index][fixed->head->index])
+ = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
+ fixed->bottomsize* A[elooper->head->index][fixed->head->index])
/ e->bottomsize;
-
- /*finally, we loop over all the edges in the tree
- on the far side of parEdge*/
+
+ /*finally, we loop over all the edges in the tree
+ on the far side of parEdge*/
elooper = T->root->leftEdge;
while ((elooper != swap) && (elooper != e)) /*start a top-first traversal*/
{
- A[e->head->index][elooper->head->index] =
+ A[e->head->index][elooper->head->index] =
A[elooper->head->index][e->head->index]
- = (skew->bottomsize * A[elooper->head->index][skew->head->index]
- + fixed->bottomsize* A[elooper->head->index][fixed->head->index])
+ = (skew->bottomsize * A[elooper->head->index][skew->head->index]
+ + fixed->bottomsize* A[elooper->head->index][fixed->head->index])
/ e->bottomsize;
elooper = topFirstTraverse(T,elooper);
}
elooper = moveUpRight(par);
while (NULL != elooper)
{
- A[e->head->index][elooper->head->index]
+ A[e->head->index][elooper->head->index]
= A[elooper->head->index][e->head->index]
- = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
- fixed->bottomsize* A[elooper->head->index][fixed->head->index])
+ = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
+ fixed->bottomsize* A[elooper->head->index][fixed->head->index])
/ e->bottomsize;
elooper = topFirstTraverse(T,elooper);
}
-
}
{
edge *par, *fixed;
edge *skew, *swap;
-
+
/* if (verbose)
printf("Branch swap across edge %s.\n",e->label);*/
skew = siblingEdge(e);
fixed = siblingEdge(swap);
par = e->tail->parentEdge;
-
+
/* if (verbose)
{
printf("Branch swap: switching edges %s and %s.\n",skew->label,swap->label);
}*/
/*perform topological switch by changing f from (u,b) to (v,b)
and g from (v,c) to (u,c), necessitatates also changing parent fields*/
-
+
swap->tail = e->tail;
skew->tail = e->head;
-
+
if (LEFT == direction)
e->head->leftEdge = skew;
else
void popHeap(int *p, int *q, double *v, int length, int i);
-void NNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray,
+void NNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray,
double *weights, int *location, int *possibleSwaps)
{
int tloc;
tloc = location[e->head->index+1];
- location[e->head->index+1] =
+ location[e->head->index+1] =
NNIEdgeTest(e,T,avgDistArray,weights + e->head->index+1);
if (NONE == location[e->head->index+1])
{
if (NONE != tloc)
- popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]);
+ popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]);
}
else
{
edgeArray = (edge **) malloc((T->size+1)*sizeof(double));
weights = (double *) malloc((T->size+1)*sizeof(double));
location = (int *) malloc((T->size+1)*sizeof(int));
-
+
double epsilon = 0.0;
for (i=0; i<numSpecies; i++)
for (j=0; j<numSpecies; j++)
epsilon += D[i][j];
epsilon = (epsilon / (numSpecies * numSpecies)) * EPSILON;
-
+
for (i=0;i<T->size+1;i++)
{
weights[i] = 0.0;
location[i] = NONE;
}
- e = findBottomLeft(T->root->leftEdge);
+ e = findBottomLeft(T->root->leftEdge);
/* *count = 0; */
while (NULL != e)
{
edgeArray[e->head->index+1] = e;
- location[e->head->index+1] =
+ location[e->head->index+1] =
NNIEdgeTest(e,T,avgDistArray,weights + e->head->index + 1);
e = depthFirstTraverse(T,e);
- }
+ }
possibleSwaps = makeThreshHeap(p,q,weights,T->size+1,0.0);
permInverse(p,q,T->size+1);
/*we put the negative values of weights into a heap, indexed by p
with the minimum value pointed to by p[1]*/
- /*p[i] is index (in edgeArray) of edge with i-th position
+ /*p[i] is index (in edgeArray) of edge with i-th position
in the heap, q[j] is the position of edge j in the heap */
while (weights[p[1]] + epsilon < 0)
{
e = centerEdge->head->leftEdge;
NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
e = centerEdge->head->rightEdge;
- NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
+ NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
e = siblingEdge(centerEdge);
NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
e = centerEdge->tail->parentEdge;
--- /dev/null
+/* #include <stdio.h> */
+/* #include <stdlib.h> */
+/* #include <math.h> */
+/* #include "graph.h" */
+/* #include "fastme.h" */
+#include "me.h"
+
+/*functions from bNNI.c*/
+void makeBMEAveragesTable(tree *T, double **D, double **A);
+void assignBMEWeights(tree *T, double **A);
+
+/*from me.c*/
+edge *siblingEdge(edge *e);
+void weighTree(tree *T);
+void freeMatrix(double **D, int size);
+edge *depthFirstTraverse(tree *T, edge *e);
+double **initDoubleMatrix(int d);
+
+/*from below*/
+node *indexedNode(tree *T, int i);
+edge *indexedEdge(tree *T, int i);
+void assignSPRWeights(node *v, double **A, double ***swapWeights);
+void SPRTopShift(tree *T, node *vmove, edge *esplit, int UpOrDown);
+void assignDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights);
+void assignDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights);
+void assignDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights);
+void assignUpWeights(edge *etest, node *vtest, node *va, edge *back, node *cprve, double oldD_AB, double coeff, double **A, double ***swapWeights);
+
+void zero3DMatrix(double ***X, int h, int l, int w)
+{
+ int i,j,k;
+ for(i=0;i<h;i++)
+ for(j=0;j<l;j++)
+ for(k=0;k<w;k++)
+ X[i][j][k] = 0.0;
+}
+
+
+void findTableMin(int *imin, int *jmin, int *kmin, int n, double ***X, double *min)
+{
+ int i,j,k;
+ for(i=0;i<2;i++)
+ for(j=0;j<n;j++)
+ for(k=0;k<n;k++)
+ {
+ if (X[i][j][k] < *min)
+ {
+ *min = X[i][j][k];
+ *imin = i;
+ *jmin = j;
+ *kmin = k;
+ }
+ }
+}
+
+
+void SPR(tree *T, double **D, double **A, int *count)
+{
+ int i,j,k;
+ node *v;
+ /*FILE *treefile;*/
+ edge *e,*f;
+ /* char filename[MAX_LABEL_LENGTH];*/
+ double ***swapWeights;
+ double swapValue = 0.0;
+ swapWeights = (double ***)malloc(2*sizeof(double **));
+ makeBMEAveragesTable(T,D,A);
+ assignBMEWeights(T,A);
+ weighTree(T);
+ /*if (verbose)
+ printf("Before SPRs: tree length is %lf.\n",T->weight);*/
+ for(i=0;i<2;i++)
+ swapWeights[i] = initDoubleMatrix(T->size);
+ do
+ {
+ swapValue=0.0;
+ zero3DMatrix(swapWeights,2,T->size,T->size);
+ i = j = k = 0;
+ for(e=depthFirstTraverse(T,NULL);NULL!=e;e=depthFirstTraverse(T,e))
+ assignSPRWeights(e->head,A,swapWeights);
+ findTableMin(&i,&j,&k,T->size,swapWeights,&swapValue);
+ swapValue = swapWeights[i][j][k];
+ if (swapValue < -EPSILON)
+ {
+// if (verbose)
+// printf("New tree weight should be %lf.\n",T->weight + 0.25*swapValue);
+ v = indexedNode(T,j);
+ f = indexedEdge(T,k);
+// if (verbose)
+// printf("Swapping tree below %s to split edge %s with head %s and tail %s\n",
+// v->parentEdge->label,f->label,f->head->label,f->tail->label);
+ SPRTopShift(T,v,f,2-i);
+ makeBMEAveragesTable(T,D,A);
+ assignBMEWeights(T,A);
+ weighTree(T);
+ (*count)++;
+ /*sprintf(filename,"tree%d.new",*count);*/
+// if (verbose)
+// printf("After %d SPRs, tree weight is %lf.\n\n",*count,T->weight);
+ /*treefile = fopen(filename,"w");
+ NewickPrintTree(T,treefile);
+ fclose(treefile);*/
+ }
+ } while (swapValue < -EPSILON);
+ for(i=0;i<2;i++)
+ freeMatrix(swapWeights[i],T->size);
+ free(swapWeights);
+ /*if (verbose)
+ readOffTree(T);*/
+}
+
+/*assigns values to array swapWeights*/
+/*swapWeights[0][j][k] will be the value of removing the tree below the edge whose head node has index j
+and reattaching it to split the edge whose head has the index k*/
+/*swapWeights[1][j][k] will be the value of removing the tree above the edge whose head node has index j
+and reattaching it to split the edge whose head has the index k*/
+void assignSPRWeights(node *vtest, double **A, double ***swapWeights)
+{
+ edge *etest, *left, *right, *sib, *par;
+ etest = vtest->parentEdge;
+ left = vtest->leftEdge;
+ right = vtest->rightEdge;
+ par = etest->tail->parentEdge;
+ sib = siblingEdge(etest);
+ if (NULL != par)
+ assignDownWeightsUp(par,vtest,sib->head,NULL,NULL,0.0,1.0,A,swapWeights);
+ if (NULL != sib)
+ assignDownWeightsSkew(sib,vtest,sib->tail,NULL,NULL,0.0,1.0,A,swapWeights);
+ /*assigns values for moving subtree rooted at vtest, starting with edge
+ parental to tail of edge parental to vtest*/
+ if (NULL != left)
+ {
+ assignUpWeights(left,vtest,right->head,NULL,NULL,0.0,1.0,A,swapWeights);
+ assignUpWeights(right,vtest,left->head,NULL,NULL,0.0,1.0,A,swapWeights);
+ }
+}
+
+
+/*recall NNI formula: change in tree length from AB|CD split to AC|BD split is
+proportional to D_AC + D_BD - D_AB - D_CD*/
+/*in our case B is the tree being moved (below vtest), A is the tree backwards below back, but
+ with the vtest subtree removed, C is the sibling tree of back and D is the tree above etest*/
+/*use va to denote the root of the sibling tree to B in the original tree*/
+/*please excuse the multiple uses of the same letters: A,D, etc.*/
+void assignDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights)
+{
+ edge *par, *sib, *skew;
+ double D_AC, D_BD, D_AB, D_CD;
+ par = etest->tail->parentEdge;
+ skew = siblingEdge(etest);
+ if (NULL == back) /*first recursive call*/
+ {
+ if (NULL == par)
+ return;
+ else /*start the process of assigning weights recursively*/
+ {
+ assignDownWeightsUp(par,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights);
+ assignDownWeightsSkew(skew,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights);
+ }
+ }
+ else /*second or later recursive call*/
+ {
+ sib = siblingEdge(back);
+ D_BD = A[vtest->index][etest->head->index]; /*straightforward*/
+ D_CD = A[sib->head->index][etest->head->index]; /*this one too*/
+ D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index]
+ - A[sib->head->index][vtest->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[0][vtest->index][etest->head->index] = swapWeights[0][vtest->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ if (NULL != par)
+ {
+ assignDownWeightsUp(par,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights);
+ assignDownWeightsSkew(skew,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights);
+ }
+ }
+}
+
+void assignDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights)
+{
+ /*same general idea as assignDownWeights, except needing to keep track of things a bit differently*/
+ edge *par, *left, *right;
+ /*par here = sib before
+ left, right here = par, skew before*/
+ double D_AB, D_CD, D_AC, D_BD;
+ /*B is subtree being moved - below vtest
+ A is subtree remaining fixed - below va, unioned with all trees already passed by B*/
+ /*C is subtree being passed by B, in this case above par
+ D is subtree below etest, fixed on other side*/
+ par = etest->tail->parentEdge;
+ left = etest->head->leftEdge;
+ right = etest->head->rightEdge;
+ if (NULL == back)
+ {
+ if (NULL == left)
+ return;
+ else
+ {
+ assignDownWeightsDown(left,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights);
+ assignDownWeightsDown(right,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights);
+ }
+ }
+ else
+ {
+ D_BD = A[vtest->index][etest->head->index];
+ D_CD = A[par->head->index][etest->head->index];
+ D_AC = A[back->head->index][par->head->index] + coeff*(A[va->index][par->head->index] - A[vtest->index][par->head->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[0][vtest->index][etest->head->index] = swapWeights[0][vtest->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ if (NULL != left)
+ {
+ assignDownWeightsDown(left,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights);
+ assignDownWeightsDown(right,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights);
+ }
+ }
+}
+
+void assignDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights)
+{
+ /*again the same general idea*/
+ edge *sib, *left, *right;
+ /*sib here = par in assignDownWeightsSkew
+ rest is the same as assignDownWeightsSkew*/
+ double D_AB, D_CD, D_AC, D_BD;
+ /*B is below vtest, A is below va unioned with all trees already passed by B*/
+ /*C is subtree being passed - below sib*/
+ /*D is tree below etest*/
+ sib = siblingEdge(etest);
+ left = etest->head->leftEdge;
+ right = etest->head->rightEdge;
+ D_BD = A[vtest->index][etest->head->index];
+ D_CD = A[sib->head->index][etest->head->index];
+ D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index] - A[sib->head->index][vtest->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[0][vtest->index][etest->head->index] = swapWeights[0][vtest->index][back->head->index] + ( D_AC + D_BD - D_AB - D_CD);
+ if (NULL != left)
+ {
+ assignDownWeightsDown(left,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights);
+ assignDownWeightsDown(right,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights);
+ }
+}
+
+void assignUpWeights(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A,
+ double ***swapWeights)
+{
+ /*SPR performed on tree above vtest...*/
+ /*same idea as above, with appropriate selections of edges and nodes*/
+ edge *sib, *left, *right;
+ /*B is above vtest, A is other tree below vtest unioned with trees in path to vtest*/
+ /*sib is tree C being passed by B*/
+ /*D is tree below etest*/
+ double D_AB, D_CD, D_AC, D_BD;
+ double thisWeight;
+ sib = siblingEdge(etest);
+ left = etest->head->leftEdge;
+ right = etest->head->rightEdge;
+ if (NULL == back) /*first recursive call*/
+ {
+ if (NULL == left)
+ return;
+ else /*start the process of assigning weights recursively*/
+ {
+ assignUpWeights(left,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights);
+ assignUpWeights(right,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights);
+ }
+ }
+ else /*second or later recursive call*/
+ {
+ D_BD = A[vtest->index][etest->head->index];
+ D_CD = A[sib->head->index][etest->head->index];
+ D_AC = A[back->head->index][sib->head->index] + coeff*(A[va->index][sib->head->index] - A[vtest->index][sib->head->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ thisWeight = swapWeights[1][vtest->index][etest->head->index] = swapWeights[1][vtest->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ if (NULL != left)
+ {
+ assignUpWeights(left,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights);
+ assignUpWeights(right,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights);
+ }
+ }
+}
+
+void pruneSubtree(edge *p, edge *u, edge *d)
+/*starting with edge u above edges p, d*/
+/*removes p, d from tree, u connects to d->head to compensate*/
+{
+ p->tail->parentEdge = NULL;/*remove p subtree*/
+ u->head = d->head;
+ d->head->parentEdge = u; /*u connected to d->head*/
+ d->head = NULL; /*d removed from tree*/
+}
+
+void SPRsplitEdge(edge *e, edge *p, edge *d)
+/*splits edge e to make it parental to p,d. d is parental to what
+ previously was below e*/
+{
+ d->head = e->head;
+ e->head = p->tail;
+ p->tail->parentEdge = e;
+ d->head->parentEdge = d;
+}
+
+
+/*topological shift function*/
+/*removes subtree rooted at v and re-inserts to spilt e*/
+void SPRDownShift(tree *T, node *v, edge *e)
+{
+ edge *vup, *vdown, *vpar;
+ vpar = v->parentEdge;
+ vdown = siblingEdge(vpar);
+ vup = vpar->tail->parentEdge;
+ /*topological shift*/
+ pruneSubtree(vpar,vup,vdown);
+ /*removes v subtree and vdown, extends vup*/
+ SPRsplitEdge(e,vpar,vdown); /*splits e to make e sibling edge to vpar,
+ both below vup*/
+}
+
+void SPRUpShift(tree *T, node *vmove, edge *esplit)
+/*an inelegant iterative version*/
+{
+ edge *f;
+ edge **EPath;
+ edge **sib;
+ node **v;
+ int i;
+ int pathLength;
+ for(f=esplit->tail->parentEdge,pathLength=1;f->tail != vmove;f=f->tail->parentEdge)
+ pathLength++;
+ /*count number of edges to vmove*/
+ /*note that pathLength > 0 will hold*/
+
+ EPath = (edge **)malloc(pathLength*sizeof(edge *));
+ v = (node **)malloc(pathLength*sizeof(edge *));
+ sib = (edge **)malloc((pathLength+1)*sizeof(edge *));
+ /*there are pathLength + 1 side trees, one at the head and tail of each edge in the path*/
+
+ sib[pathLength] = siblingEdge(esplit);
+ i = pathLength;
+ f = esplit->tail->parentEdge;
+ while (i > 0)
+ {
+ i--;
+ EPath[i] = f;
+ sib[i] = siblingEdge(f);
+ v[i] = f->head;
+ f = f->tail->parentEdge;
+ }
+ /*indexed so head of Epath is v[i], tail is v[i-1] and sibling edge is sib[i]*/
+ /*need to assign head, tail of each edge in path
+ as well as have proper values for the left and right fields*/
+
+ if (esplit == esplit->tail->leftEdge)
+ {
+ vmove->leftEdge = esplit;
+ vmove->rightEdge = EPath[pathLength-1];
+ }
+ else
+ {
+ vmove->rightEdge = esplit;
+ vmove->leftEdge = EPath[pathLength-1];
+ }
+ esplit->tail = vmove;
+ /*espilt->head remains unchanged*/
+ /*vmove has the proper fields for left, right, and parentEdge*/
+
+ for(i=0;i<(pathLength-1);i++)
+ EPath[i]->tail = v[i+1];
+
+ /*this bit flips the orientation along the path
+ the tail of Epath[i] is now v[i+1] instead of v[i-1]*/
+
+ EPath[pathLength-1]->tail = vmove;
+
+ for(i=1;i<pathLength;i++)
+ {
+ if (sib[i+1] == v[i]->leftEdge)
+ v[i]->rightEdge = EPath[i-1];
+ else
+ v[i]->leftEdge = EPath[i-1];
+ }
+ if (sib[1] == v[0]->leftEdge)
+ v[0]->rightEdge = sib[0];
+ else
+ v[0]->leftEdge = sib[0];
+ sib[0]->tail = v[0];
+ free(EPath);
+ free(v);
+ free(sib);
+}
+
+
+void SPRTopShift(tree *T, node *vmove, edge *esplit, int UpOrDown)
+{
+ if (DOWN == UpOrDown)
+ SPRDownShift(T,vmove,esplit);
+ else
+ SPRUpShift(T,vmove,esplit);
+}
+
+node *indexedNode(tree *T, int i)
+{
+ edge *e;
+ for(e = depthFirstTraverse(T,NULL);NULL!=e;e=depthFirstTraverse(T,e))
+ if (i == e->head->index)
+ return(e->head);
+ if (i == T->root->index)
+ return(T->root);
+ return(NULL);
+}
+
+edge *indexedEdge(tree *T, int i)
+{
+ edge *e;
+ for(e = depthFirstTraverse(T,NULL);NULL!=e;e=depthFirstTraverse(T,e))
+ if (i == e->head->index)
+ return(e);
+ return(NULL);
+}
--- /dev/null
+/* #include <stdio.h> */
+/* #include <stdlib.h> */
+/* #include <math.h> */
+/* #include "graph.h" */
+/* #include <string.h> */
+#include "me.h"
+
+/*functions from me_balanced.c*/
+void makeBMEAveragesTable(tree *T, double **D, double **A);
+void assignBMEWeights(tree *T, double **A);
+
+/*from me.c*/
+edge *siblingEdge(edge *e);
+double **initDoubleMatrix(int d);
+void freeMatrix(double **D, int size);
+edge *depthFirstTraverse(tree *T, edge *e);
+edge *findBottomLeft(edge *e);
+
+/*from bnni.c*/
+void weighTree(tree *T);
+
+void freeTree(tree *T);
+
+/*from SPR.c*/
+void zero3DMatrix(double ***X, int h, int l, int w);
+
+void assignTBRDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights,
+ double *bestWeight, edge **eBestSplit, edge **eBestTop, edge **eBestBottom);
+void assignTBRDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights,
+ double *bestWeight, edge **eBestSplit, edge **eBestTop, edge **eBestBototm);
+void assignTBRDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights,
+ double *bestWeight, edge **eBestSplit, edge **eBestTop, edge **eBestBottom);
+
+void assignTBRUpWeights(edge *ebottom, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A,
+ double **dXTop, double ***swapWeights, edge *etop, double *bestWeight,
+ edge **bestSplit, edge **bestTop, edge **bestBottom)
+ /*function assigns the value for etop if the tree below vtest is moved to be below etop*/
+ /*and SPR for tree bottom tree splits ebottom*/
+ /*recursive function searching over values of ebottom*/
+ /*minor variant of SPR.c's assignUpWeights
+ difference is the index assignment in the array swapWeights, which has a different meaning
+ for the TBR routines*/
+/*also using dXTop to assign value of average distance to tree above vtest*/
+{ /*SPR performed on tree above vtest...*/
+ edge *sib, *left, *right;
+ /*B is above vtest, A is other tree below vtest unioned with trees in path to vtest*/
+ /*sib is tree C being passed by B*/
+ /*D is tree below etest*/
+ double D_AB, D_CD, D_AC, D_BD;
+ sib = siblingEdge(ebottom);
+ left = ebottom->head->leftEdge;
+ right = ebottom->head->rightEdge;
+ if (NULL == etop)
+ {
+ if (NULL == back) /*first recursive call*/
+ {
+ if (NULL == left)
+ return; /*no subtree below for SPR*/
+ else /*NULL == back and NULL == etop*/
+ {
+ assignTBRUpWeights(left,vtest,va,ebottom,va,A[va->index][vtest->index],0.5,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom);
+ assignTBRUpWeights(right,vtest,va,ebottom,va,A[va->index][vtest->index],0.5,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom);
+ }
+ }
+ else /*NULL != back*/
+ {
+ D_BD = A[ebottom->head->index][vtest->index];
+ /*B is tree above vtest, D is tree below ebottom*/
+ D_CD = A[sib->head->index][ebottom->head->index]; /*C is tree below sib*/
+ D_AC = A[back->head->index][sib->head->index] +
+ coeff*(A[va->index][sib->head->index] - A[vtest->index][sib->head->index]);
+ /*va is root of subtree skew back at vtest*/
+ /*A is union of tree below va and all subtrees already passed in path from vtest to ebottom*/
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[vtest->index][ebottom->head->index][ebottom->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ if (swapWeights[vtest->index][ebottom->head->index][ebottom->head->index] < *bestWeight)
+ {
+ *bestSplit = vtest->parentEdge;
+ *bestTop = NULL;
+ *bestBottom = ebottom;
+ *bestWeight = swapWeights[vtest->index][ebottom->head->index][ebottom->head->index];
+ }
+ if (NULL != left)
+ {
+ assignTBRUpWeights(left,vtest,va,ebottom,sib->head,D_AB,0.5*coeff,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom);
+ assignTBRUpWeights(right,vtest,va,ebottom,sib->head,D_AB,0.5*coeff,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom);
+ }
+ }
+ }
+ else /*NULL != etop*/
+ {
+ if (NULL == back) /*first recursive call*/
+ {
+ if (swapWeights[vtest->index][etop->head->index][etop->head->index]< *bestWeight)
+ /*this represents value of SPR from esplit to etop, with no SPR in bottom tree*/
+ {
+ *bestSplit = vtest->parentEdge;
+ *bestTop = etop;
+ *bestBottom = NULL;
+ *bestWeight = swapWeights[vtest->index][etop->head->index][etop->head->index];
+ }
+ if (NULL == left)
+ return; /*no subtree below for SPR*/
+ else if (NULL != etop)/*start the process of assigning weights recursively*/
+ {
+ assignTBRUpWeights(left,vtest,va,ebottom,va,dXTop[va->index][etop->head->index],0.5,A,dXTop,swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom);
+ assignTBRUpWeights(right,vtest,va,ebottom,va,dXTop[va->index][etop->head->index],0.5,A,dXTop,swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom);
+ }
+ } /*NULL == back*/
+ /*in following bit, any average distance of form A[vtest->index][x->index] is
+ replaced by dXTop[x->index][etop->head->index]*/
+ else /*second or later recursive call, NULL != etop*/
+ {
+ D_BD = dXTop[ebottom->head->index][etop->head->index]; /*B is tree above vtest - it is in configuration
+ indexed by etop*/
+ /*D is tree below ebottom*/
+ D_CD = A[sib->head->index][ebottom->head->index]; /*C is tree below sib*/
+ D_AC = A[back->head->index][sib->head->index] +
+ coeff*(A[va->index][sib->head->index] - A[sib->head->index][vtest->index]);
+ /*it is correct to use A[][] here because the bad average distances involving B from the first term will
+ be cancelled by the bad average distances involving B in the subtracted part*/
+ /*va is root of subtree skew back at vtest*/
+ /*A is union of tree below va and all subtrees already passed in path from vtest to ebottom*/
+ D_AB = 0.5*(oldD_AB + dXTop[cprev->index][etop->head->index]);
+ swapWeights[vtest->index][ebottom->head->index][etop->head->index] = swapWeights[vtest->index][back->head->index][etop->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ if (swapWeights[vtest->index][ebottom->head->index][etop->head->index] + swapWeights[vtest->index][etop->head->index][etop->head->index]< *bestWeight)
+ /*first term is contribution of second SPR, second term is contribution of first SPR*/
+ {
+ *bestSplit = vtest->parentEdge;
+ *bestTop = etop;
+ *bestBottom = ebottom;
+ *bestWeight = swapWeights[vtest->index][ebottom->head->index][etop->head->index] + swapWeights[vtest->index][etop->head->index][etop->head->index];
+ }
+ if (NULL != left)
+ {
+ assignTBRUpWeights(left,vtest, va, ebottom, sib->head, D_AB, 0.5*coeff, A, dXTop, swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom);
+ assignTBRUpWeights(right,vtest, va, ebottom, sib->head, D_AB, 0.5*coeff, A, dXTop, swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom);
+ }
+ } /*else NULL != back, etop*/
+ }
+}
+
+/*recall NNI formula: change in tree length from AB|CD split to AC|BD split is
+proportional to D_AC + D_BD - D_AB - D_CD*/
+/*in our case B is the tree being moved (below vtest), A is the tree backwards below back, but
+with the vtest subtree removed, C is the sibling tree of back and D is the tree above vtest*/
+/*use va to denote the root of the sibling tree to B in the original tree*/
+/*please excuse the multiple uses of the same letters: A,D, etc.*/
+void assignTBRDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights,
+ double *bestWeight, edge **bestSplitEdge, edge **bestTop, edge **bestBottom)
+{
+ edge *par, *sib, *skew;
+ double D_AC, D_BD, D_AB, D_CD;
+ par = etest->tail->parentEdge;
+ skew = siblingEdge(etest);
+ if (NULL == back) /*first recursive call*/
+ {
+ if (NULL == par)
+ return;
+ else /*start the process of assigning weights recursively*/
+ {
+ assignTBRDownWeightsUp(par,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ assignTBRDownWeightsSkew(skew,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ }
+ }
+ else /*second or later recursive call*/
+ {
+ sib = siblingEdge(back);
+ D_BD = A[vtest->index][etest->head->index]; /*straightforward*/
+ D_CD = A[sib->head->index][etest->head->index]; /*this one too*/
+ D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index]
+ - A[sib->head->index][vtest->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[vtest->index][etest->head->index][etest->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ /*using diagonal to store values for SPR swaps above the split edge*/
+ /*this is value of swapping tree below vtest to break etest*/
+ if (swapWeights[vtest->index][etest->head->index][etest->head->index] < *bestWeight)
+ {
+ *bestWeight = swapWeights[vtest->index][etest->head->index][etest->head->index];
+ *bestSplitEdge = vtest->parentEdge;
+ *bestTop = etest;
+ *bestBottom = NULL;
+ }
+ if (NULL != par)
+ {
+ assignTBRDownWeightsUp(par,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ assignTBRDownWeightsSkew(skew,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ }
+ }
+}
+
+
+void assignTBRDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights,
+ double *bestWeight, edge **bestSplitEdge, edge **bestTop, edge **bestBottom)
+{
+ /*same general idea as assignDownWeights, except needing to keep track of things a bit differently*/
+ edge *par, *left, *right;
+ /*par here = sib before
+ left, right here = par, skew before*/
+ double D_AB, D_CD, D_AC, D_BD;
+ /*B is subtree being moved - below vtest
+ A is subtree remaining fixed - below va, unioned with all trees already passed by B*/
+ /*C is subtree being passed by B, in this case above par
+ D is subtree below etest, fixed on other side*/
+ par = etest->tail->parentEdge;
+ left = etest->head->leftEdge;
+ right = etest->head->rightEdge;
+ if (NULL == back)
+ {
+ if (NULL == left)
+ return;
+ else
+ {
+ assignTBRDownWeightsDown(left,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ assignTBRDownWeightsDown(right,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ }
+ }
+ else
+ {
+ D_BD = A[vtest->index][etest->head->index];
+ D_CD = A[par->head->index][etest->head->index];
+ D_AC = A[back->head->index][par->head->index] + coeff*(A[va->index][par->head->index] - A[vtest->index][par->head->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[vtest->index][etest->head->index][etest->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD);
+ if (swapWeights[vtest->index][etest->head->index][etest->head->index] < *bestWeight)
+ {
+ *bestWeight = swapWeights[vtest->index][etest->head->index][etest->head->index];
+ *bestSplitEdge = vtest->parentEdge;
+ *bestTop = etest;
+ *bestBottom = NULL;
+ }
+ if (NULL != left)
+ {
+ assignTBRDownWeightsDown(left,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ assignTBRDownWeightsDown(right,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ }
+ }
+}
+
+void assignTBRDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights,
+ double *bestWeight, edge **bestSplitEdge, edge **bestTop, edge **bestBottom)
+{
+ /*again the same general idea*/
+ edge *sib, *left, *right;
+ /*sib here = par in assignDownWeightsSkew
+ rest is the same as assignDownWeightsSkew*/
+ double D_AB, D_CD, D_AC, D_BD;
+ /*B is below vtest, A is below va unioned with all trees already passed by B*/
+ /*C is subtree being passed - below sib*/
+ /*D is tree below etest*/
+ sib = siblingEdge(etest);
+ left = etest->head->leftEdge;
+ right = etest->head->rightEdge;
+ D_BD = A[vtest->index][etest->head->index];
+ D_CD = A[sib->head->index][etest->head->index];
+ D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index] - A[sib->head->index][vtest->index]);
+ D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]);
+ swapWeights[vtest->index][etest->head->index][etest->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + ( D_AC + D_BD - D_AB - D_CD);
+ if (swapWeights[vtest->index][etest->head->index][etest->head->index] < *bestWeight)
+ {
+ *bestWeight = swapWeights[vtest->index][etest->head->index][etest->head->index];
+ *bestSplitEdge = vtest->parentEdge;
+ *bestTop = etest;
+ *bestBottom = NULL;
+ }
+ if (NULL != left)
+ {
+ assignTBRDownWeightsDown(left,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ assignTBRDownWeightsDown(right,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom);
+ }
+}
+
+/*general idea is to have a double loop for a given edge, testing all SPRs for the subtrees above and below a given edge.
+ Then that function loops over all the edges of a tree*/
+
+void TBRswitch(tree *T, edge *e1, edge *e2, edge *e3);
+
+/*vbottom is node below esplit for average calculations in matrix dXTop, A is matrix of average
+ distances from original tree, dXout is average distance from vbottom to tree rooted at far edge
+ of eback, if SPR breaking eback, UpOrDown indicates whether etop is in path above split edge
+ (Up) or not (Down)*/
+void calcTBRTopBottomAverage(node *vbottom, double **A, double **dXTop, double dXOut,
+ edge *esplit, edge *etop, edge *eback, int UpOrDown)
+{
+ edge *enew1, *enew2, *emove;
+ double newdXOut;
+ if (esplit == eback) /*top level call - means trivial SPR*/
+ dXTop[vbottom->index][etop->head->index] = A[vbottom->index][esplit->head->index];
+ else
+ dXTop[vbottom->index][etop->head->index] = dXTop[vbottom->index][eback->head->index] +
+ 0.25*(A[vbottom->index][etop->head->index] - dXOut);
+ /*by moving etop past the vbottom tree, everything in the etop tree is closer by coefficient of
+ 0.25, while everything in the old back tree is further by a coefficient of 0.25*/
+ /*everything in the tree that is being moved (emove) keeps the same relative weight in the average
+ distance calculation*/
+ if (UP == UpOrDown)
+ {
+ enew1 = etop->tail->parentEdge;
+ if (NULL != enew1) /*need to do recursive calls*/
+ {
+ enew2 = siblingEdge(etop);
+ emove = siblingEdge(eback); /*emove is third edge meeting at vertex with eback, etest*/
+ if (esplit == eback)
+ newdXOut = A[vbottom->index][emove->head->index];
+ else
+ newdXOut = 0.5*(dXOut + A[vbottom->index][emove->head->index]);
+ calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit, enew1,etop,UP); /*old etop is new value for back*/
+ calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit, enew2,etop,DOWN);
+ }
+ }
+ else /*moving down*/
+ {
+ enew1 = etop->head->leftEdge;
+ if (NULL != enew1)
+ {
+ enew2 = etop->head->rightEdge;
+ if (eback == siblingEdge(etop))
+ emove = etop->tail->parentEdge;
+ else
+ emove = siblingEdge(etop);
+ if (esplit == eback)
+ newdXOut = A[vbottom->index][emove->head->index];
+ else
+ newdXOut = 0.5*(dXOut + A[vbottom->index][emove->head->index]);
+ calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit,enew1,etop,DOWN);
+ calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit,enew2,etop,DOWN);
+ }
+ }
+}
+
+void calcTBRaverages(tree *T, edge *esplit, double **A, double **dXTop)
+{
+ edge *ebottom, *par, *sib;
+ for (ebottom = findBottomLeft(esplit); ebottom != esplit; ebottom = depthFirstTraverse(T,ebottom))
+ {
+ par = esplit->tail->parentEdge;
+ sib = siblingEdge(esplit);
+ calcTBRTopBottomAverage(ebottom->head,A, dXTop, 0.0, esplit, par,esplit,UP);
+ calcTBRTopBottomAverage(ebottom->head,A, dXTop, 0.0, esplit, sib,esplit,DOWN);
+ }
+}
+
+void TBR(tree *T, double **D, double **A)
+{
+ int i;
+ edge *esplit, *etop, *eBestTop, *eBestBottom, *eBestSplit;
+ edge *eout, *block;
+ edge *left, *right, *par, *sib;
+ double **dXTop; /*dXTop[i][j] is average distance from subtree rooted at i to tree above split edge, if
+ SPR above split edge cuts edge whose head has index j*/
+ double bestWeight;
+ double ***TBRWeights;
+ dXTop = initDoubleMatrix(T->size);
+ weighTree(T);
+ TBRWeights = (double ***)calloc(T->size,sizeof(double **));
+ for(i=0;i<T->size;i++)
+ TBRWeights[i] = initDoubleMatrix(T->size);
+ do
+ {
+ zero3DMatrix(TBRWeights,T->size,T->size,T->size);
+ bestWeight = 0.0;
+ eBestSplit = eBestTop = eBestBottom = NULL;
+ for(esplit=depthFirstTraverse(T,NULL);NULL!=esplit;esplit=depthFirstTraverse(T,esplit))
+ {
+ par = esplit->tail->parentEdge;
+ if (NULL != par)
+ {
+ sib = siblingEdge(esplit);
+ /*next two lines calculate the possible improvements for any SPR above esplit*/
+ assignTBRDownWeightsUp(par,esplit->head,sib->head,NULL,NULL,0.0,1.0,A,TBRWeights,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ assignTBRDownWeightsSkew(sib,esplit->head,sib->tail,NULL,NULL,0.0,1.0,A,TBRWeights,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ calcTBRaverages(T,esplit,A,dXTop); /*calculates the average distance from any subtree
+ below esplit to the entire subtree above esplit,
+ after any possible SPR above*/
+ /*for etop above esplit, we loop using information from above to calculate values
+ for all possible SPRs below esplit*/
+ }
+
+ right = esplit->head->rightEdge;
+ if (NULL != right)
+ {
+ left = esplit->head->leftEdge;
+ /*test case: etop = null means only do bottom SPR*/
+ assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,NULL,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,NULL,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+
+ block = esplit;
+ while (NULL != block)
+ {
+ if (block != esplit)
+ {
+ etop = block;
+ assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ }
+ eout = siblingEdge(block);
+ if (NULL != eout)
+ {
+ etop = findBottomLeft(eout);
+ while (etop->tail != eout->tail)
+ {
+ /*for ebottom below esplit*/
+
+ assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ etop = depthFirstTraverse(T,etop);
+ }
+
+ /*etop == eout*/
+
+ assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom);
+ }
+ block = block->tail->parentEdge;
+ }
+ } /*if NULL != right*/
+ } /*for esplit*/
+ /*find bestWeight, best split edge, etc.*/
+ if (bestWeight < -EPSILON)
+ {
+// if (verbose)
+// {
+// printf("TBR #%d: Splitting edge %s: top edge %s, bottom edge %s\n",*count+1,
+// eBestSplit->label, eBestTop->label,eBestBottom->label);
+// printf("Old tree weight is %lf, new tree weight should be %lf\n",T->weight, T->weight + 0.25*bestWeight);
+// }
+ TBRswitch(T,eBestSplit,eBestTop,eBestBottom);
+ makeBMEAveragesTable(T,D,A);
+ assignBMEWeights(T,A);
+ weighTree(T);
+// if (verbose)
+// printf("TBR: new tree weight is %lf\n\n",T->weight);<
+// (*count)++;
+ }
+ else
+ bestWeight = 1.0;
+ } while (bestWeight < -EPSILON);
+ for(i=0;i<T->size;i++)
+ freeMatrix(TBRWeights[i],T->size);
+ freeMatrix(dXTop,T->size);
+}
+
+void SPRTopShift(tree *T, node *v, edge *e, int UpOrDown);
+
+void TBRswitch(tree *T, edge *es, edge *et, edge *eb)
+{
+ if (NULL != et)
+ SPRTopShift(T,es->head,et,DOWN); /*DOWN because tree being moved is below split edge*/
+ if (NULL != eb)
+ SPRTopShift(T,es->head,eb,UP); /*UP because tree being moved is above split edge*/
+}
-/*#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "graph.h"
-#include "main.h"
-*/
#include "me.h"
/*boolean leaf(node *v);
void popHeap(int *p, int *q, double *v, int length, int i);
-void bNNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray,
+void bNNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray,
double *weights, int *location, int *possibleSwaps)
{
int tloc;
tloc = location[e->head->index+1];
- location[e->head->index+1] =
+ location[e->head->index+1] =
bNNIEdgeTest(e,T,avgDistArray,weights + e->head->index+1);
if (NONE == location[e->head->index+1])
{
if (NONE != tloc)
- popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]);
+ popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]);
}
else
{
edgeArray = (edge **) malloc((T->size+1)*sizeof(double));
weights = (double *) malloc((T->size+1)*sizeof(double));
location = (int *) malloc((T->size+1)*sizeof(int));
-
+
double epsilon = 0.0;
for (i=0; i<numSpecies; i++)
for (j=0; j<numSpecies; j++)
assignBMEWeights(T,avgDistArray);
weighTree(T);
}*/
- e = findBottomLeft(T->root->leftEdge);
+ e = findBottomLeft(T->root->leftEdge);
while (NULL != e)
{
edgeArray[e->head->index+1] = e;
- location[e->head->index+1] =
+ location[e->head->index+1] =
bNNIEdgeTest(e,T,avgDistArray,weights + e->head->index + 1);
e = depthFirstTraverse(T,e);
- }
+ }
possibleSwaps = makeThreshHeap(p,q,weights,T->size+1,0.0);
permInverse(p,q,T->size+1);
/*we put the negative values of weights into a heap, indexed by p
with the minimum value pointed to by p[1]*/
- /*p[i] is index (in edgeArray) of edge with i-th position
+ /*p[i] is index (in edgeArray) of edge with i-th position
in the heap, q[j] is the position of edge j in the heap */
while (weights[p[1]] + epsilon < 0)
{
updateSubTreeAfterNNI(A, v, rootEdge->head->rightEdge, closer, further, 0.5*dcoeff,UP);
updatePair(A, rootEdge, rootEdge, closer, further, dcoeff, UP);
sib = siblingEdge(v->parentEdge);
- A[rootEdge->head->index][v->index] =
- A[v->index][rootEdge->head->index] =
+ A[rootEdge->head->index][v->index] =
+ A[v->index][rootEdge->head->index] =
0.5*A[rootEdge->head->index][sib->head->index] +
- 0.5*A[rootEdge->head->index][v->parentEdge->tail->index];
+ 0.5*A[rootEdge->head->index][v->parentEdge->tail->index];
break;
case DOWN: /*rootEdge is above the center edge of the NNI*/
sib = siblingEdge(rootEdge);
if (NULL != rootEdge->tail->parentEdge)
updateSubTreeAfterNNI(A, v, rootEdge->tail->parentEdge, closer, further, 0.5*dcoeff, DOWN);
updatePair(A, rootEdge, rootEdge, closer, further, dcoeff, DOWN);
- A[rootEdge->head->index][v->index] =
- A[v->index][rootEdge->head->index] =
+ A[rootEdge->head->index][v->index] =
+ A[v->index][rootEdge->head->index] =
0.5*A[rootEdge->head->index][v->leftEdge->head->index] +
0.5*A[rootEdge->head->index][v->rightEdge->head->index];
break;
if (NULL != rootEdge->head->rightEdge)
updateSubTreeAfterNNI(A, v, rootEdge->head->rightEdge, closer, further, 0.5*dcoeff,SKEW);
updatePair(A, rootEdge, rootEdge, closer, further, dcoeff, UP);
- A[rootEdge->head->index][v->index] =
- A[v->index][rootEdge->head->index] =
+ A[rootEdge->head->index][v->index] =
+ A[v->index][rootEdge->head->index] =
0.5*A[rootEdge->head->index][v->leftEdge->head->index] +
0.5*A[rootEdge->head->index][v->rightEdge->head->index];
break;
}
/*swapping across edge whose head is v*/
-void bNNIupdateAverages(double **A, node *v, edge *par, edge *skew,
+void bNNIupdateAverages(double **A, node *v, edge *par, edge *skew,
edge *swap, edge *fixed)
-{
- A[v->index][v->index] = 0.25*(A[fixed->head->index][par->head->index] +
- A[fixed->head->index][swap->head->index] +
- A[skew->head->index][par->head->index] +
+{
+ A[v->index][v->index] = 0.25*(A[fixed->head->index][par->head->index] +
+ A[fixed->head->index][swap->head->index] +
+ A[skew->head->index][par->head->index] +
A[skew->head->index][swap->head->index]);
updateSubTreeAfterNNI(A, v, fixed, skew->head, swap->head, 0.25, UP);
updateSubTreeAfterNNI(A, v, par, swap->head, skew->head, 0.25, DOWN);
- updateSubTreeAfterNNI(A, v, skew, fixed->head, par->head, 0.25, UP);
+ updateSubTreeAfterNNI(A, v, skew, fixed->head, par->head, 0.25, UP);
updateSubTreeAfterNNI(A, v, swap, par->head, fixed->head, 0.25, SKEW);
-
}
printf("Weight dropping by %lf.\n",w0 - w1);
printf("New weight should be %lf.\n",T->weight + w1 - w0);
}*/
- return(LEFT);
+ return(LEFT);
}
}
-
/*limitedFillTableUp fills all the entries in D associated with
e->head,f->head and those edges g->head above e->head, working
recursively and stopping when trigger is reached*/
if (f != trigger)
limitedFillTableUp(e,g,A,trigger);
h = siblingEdge(f);
- A[e->head->index][f->head->index] =
- A[f->head->index][e->head->index] =
- 0.5*(A[e->head->index][g->head->index] + A[e->head->index][h->head->index]);
+ A[e->head->index][f->head->index] =
+ A[f->head->index][e->head->index] =
+ 0.5*(A[e->head->index][g->head->index] + A[e->head->index][h->head->index]);
}
-
-/* dist_dna.c 2008-01-19 */
+/* dist_dna.c 2008-12-22 */
/* Copyright 2005-2008 Emmanuel Paradis
if (KnownBase(x[s1]) && KnownBase(x[s2])) L++;\
else continue;
-void distDNA_raw(unsigned char *x, int *n, int *s, double *d)
+void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled)
{
int i1, i2, s1, s2, target, Nd;
Nd = 0;
for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n)
if (DifferentBase(x[s1], x[s2])) Nd++;
- d[target] = ((double) Nd / *s);
+ if (scaled) d[target] = ((double) Nd / *s);
+ else d[target] = ((double) Nd);
target++;
}
}
}
-void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d)
+void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d, int scaled)
{
int i1, i2, s1, s2, target, Nd, L;
CHECK_PAIRWISE_DELETION
if (DifferentBase(x[s1], x[s2])) Nd++;
}
- d[target] = ((double) Nd/L);
+ if (scaled) d[target] = ((double) Nd/L);
+ else d[target] = ((double) Nd);
target++;
}
}
int *gamma, double *alpha)
{
switch (*model) {
- case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d);
- else distDNA_raw(x, n, s, d); break;
+ case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 1);
+ else distDNA_raw(x, n, s, d, 1); break;
case 2 : if (pairdel) distDNA_JC69_pairdel(x, n, s, d, variance, var, gamma, alpha);
else distDNA_JC69(x, n, s, d, variance, var, gamma, alpha); break;
case 12 : if (pairdel) distDNA_ParaLin_pairdel(x, n, s, d, variance, var);
else distDNA_ParaLin(x, n, s, d, variance, var); break;
+ case 13 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 0);
+ else distDNA_raw(x, n, s, d, 0); break;
}
}
-/*#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "main.h"*/
-
#include "me.h"
int *initPerm(int size)
/*The usual Heapify function, tailored for our use with a heap of scores*/
/*will use array p to keep track of indexes*/
-/*after scoreHeapify is called, the subtree rooted at i
+/*after scoreHeapify is called, the subtree rooted at i
will be a heap*/
/*p goes from heap to array, q goes from array to heap*/
if ((right <= n) && (HeapArray[p[right]] < HeapArray[p[smallest]]))
smallest = right;
if (smallest != i){
- swap(p,q,i, smallest);
- /*push smallest up the heap*/
+ swap(p,q,i, smallest);
+ /*push smallest up the heap*/
i = smallest; /*check next level down*/
}
else
} while(moreswap);
}
-/*heap is of indices of elements of v,
+/*heap is of indices of elements of v,
popHeap takes the index at position i and pushes it out of the heap
(by pushing it to the bottom of the heap, where it is not noticed)*/
void bNNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies);
//functions from NNI.c
void NNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies);
+//functions from SPR.c
+void SPR(tree *T, double **D, double **A, int *count);
+//functions from TBR.c
+void TBR(tree *T, double **D, double **A);
-void me_b(double *X, int *N, char **labels, int *nni,
+void me_b(double *X, int *N, char **labels,
+ int *nni, int *spr, int *tbr,
int *edge1, int *edge2, double *el, char **tl)
{
double **D, **A;
T = BMEaddSpecies(T, addNode, D, A);
}
// Compute bNNI
- if (*nni == 1)
- bNNI(T, A, &nniCount, D, n);
+ if (*nni) bNNI(T, A, &nniCount, D, n);
assignBMEWeights(T,A);
+ if (*spr) SPR(T, D, A, &nniCount);
+ if (*tbr) TBR(T, D, A);
+
tree2phylo(T, edge1, edge2, el, tl, n);
freeMatrix(D,n);
}
makeOLSAveragesTable(T,D,A);
// Compute NNI
- if (*nni == 1)
+ if (*nni)
NNI(T,A,&nniCount,D,n);
assignOLSWeights(T,A);
struct set *secondNode;
} set;
-void me_b(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl);
+void me_b(double *X, int *N, char **labels, int *nni, int *spr, int *tbr, int *edge1, int *edge2, double *el, char **tl);
void me_o(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl);
//int whiteSpace(char c);
double **initDoubleMatrix(int d);
-//#include <stdio.h>
-//#include <stdlib.h>
-//#include <math.h>
#include "me.h"
void BalWFext(edge *e, double **A) /*works except when e is the one edge
BalWFint(e,A);
e = depthFirstTraverse(T,e);
}
-}
+}
void BMEcalcDownAverage(tree *T, node *v, edge *e, double **D, double **A)
{
edge *left, *right;
if (leaf(e->head))
- A[e->head->index][v->index] = D[v->index2][e->head->index2];
+ A[e->head->index][v->index] = D[v->index2][e->head->index2];
else
{
left = e->head->leftEdge;
right = e->head->rightEdge;
- A[e->head->index][v->index] = 0.5 * A[left->head->index][v->index]
+ A[e->head->index][v->index] = 0.5 * A[left->head->index][v->index]
+ 0.5 * A[right->head->index][v->index];
}
}
BMEcalcDownAverage(T,v,e,D,A);
e = depthFirstTraverse(T,e);
}
-
- e = topFirstTraverse(T,e); /*the upward averages need to be calculated
+
+ e = topFirstTraverse(T,e); /*the upward averages need to be calculated
from top to bottom */
while(NULL != e)
{
switch(direction) /*the various cases refer to where the new vertex has
been inserted, in relation to the edge nearEdge*/
{
- case UP: /*this case is called when v has been inserted above
+ case UP: /*this case is called when v has been inserted above
or skew to farEdge*/
/*do recursive calls first!*/
if (NULL != farEdge->head->leftEdge)
= A[farEdge->head->index][nearEdge->head->index]
+ dcoeff*A[farEdge->head->index][v->index]
- dcoeff*A[farEdge->head->index][root->index];
- break;
+ break;
case DOWN: /*called when v has been inserted below farEdge*/
if (NULL != farEdge->tail->parentEdge)
updatePair(A,nearEdge,farEdge->tail->parentEdge,v,root,dcoeff,DOWN);
A[nearEdge->head->index][farEdge->head->index]
= A[farEdge->head->index][nearEdge->head->index]
+ dcoeff*A[v->index][farEdge->head->index]
- - dcoeff*A[farEdge->head->index][root->index];
+ - dcoeff*A[farEdge->head->index][root->index];
}
}
{
case UP: /*newNode is above the edge nearEdge*/
A[v->index][nearEdge->head->index] = A[nearEdge->head->index][v->index];
- A[newNode->index][nearEdge->head->index] =
+ A[newNode->index][nearEdge->head->index] =
A[nearEdge->head->index][newNode->index] =
- A[nearEdge->head->index][root->index];
+ A[nearEdge->head->index][root->index];
if (NULL != nearEdge->head->leftEdge)
updateSubTree(A, nearEdge->head->leftEdge, v, root, newNode, 0.5*dcoeff, UP);
if (NULL != nearEdge->head->rightEdge)
break;
case DOWN: /*newNode is below the edge nearEdge*/
A[nearEdge->head->index][v->index] = A[v->index][nearEdge->head->index];
- A[newNode->index][nearEdge->head->index] =
+ A[newNode->index][nearEdge->head->index] =
A[nearEdge->head->index][newNode->index] =
- 0.5*(A[nearEdge->head->index][root->index]
+ 0.5*(A[nearEdge->head->index][root->index]
+ A[v->index][nearEdge->head->index]);
sib = siblingEdge(nearEdge);
if (NULL != sib)
break;
case SKEW: /*newNode is neither above nor below nearEdge*/
A[v->index][nearEdge->head->index] = A[nearEdge->head->index][v->index];
- A[newNode->index][nearEdge->head->index] =
+ A[newNode->index][nearEdge->head->index] =
A[nearEdge->head->index][newNode->index] =
- 0.5*(A[nearEdge->head->index][root->index] +
- A[nearEdge->head->index][v->index]);
+ 0.5*(A[nearEdge->head->index][root->index] +
+ A[nearEdge->head->index][v->index]);
if (NULL != nearEdge->head->leftEdge)
updateSubTree(A, nearEdge->head->leftEdge, v, root, newNode, 0.5*dcoeff,SKEW);
if (NULL != nearEdge->head->rightEdge)
}
-/*we update all the averages for nodes (u1,u2), where the insertion point of
+/*we update all the averages for nodes (u1,u2), where the insertion point of
v is in "direction" from both u1 and u2 */
/*The general idea is to proceed in a direction from those edges already corrected
*/
/*first, update the v,newNode entries*/
A[newNode->index][newNode->index] = 0.5*(A[e->head->index][e->head->index]
+ A[v->index][e->head->index]);
- A[v->index][newNode->index] = A[newNode->index][v->index] =
+ A[v->index][newNode->index] = A[newNode->index][v->index] =
A[v->index][e->head->index];
- A[v->index][v->index] =
+ A[v->index][v->index] =
0.5*(A[e->head->index][v->index] + A[v->index][e->head->index]);
left = e->head->leftEdge;
right = e->head->rightEdge;
A[v->index][e->head->index] = A[e->head->index][v->index];
updatePair(A,e,e,v,e->head,0.5,UP); /*updates e->head fields only*/
-}
+}
/*A is tree below sibling, B is tree below edge, C is tree above edge*/
double wf3(double D_AB, double D_AC, double D_kB, double D_kC)
//sprintf(edgeLabel2,"E%d",T->size+1);
snprintf(edgeLabel1,EDGE_LABEL_LENGTH,"E%d",T->size);
snprintf(edgeLabel2,EDGE_LABEL_LENGTH,"E%d",T->size+1);
-
/*make the new node and edges*/
newNode = makeNewNode(nodeLabel,T->size+1);
v->parentEdge = newPendantEdge;
e->head = newNode;
- T->size = T->size + 2;
+ T->size = T->size + 2;
- if (e->tail->leftEdge == e)
+ if (e->tail->leftEdge == e)
/*actually this is totally arbitrary and probably unnecessary*/
{
newNode->leftEdge = newInternalEdge;
}
}
-tree *BMEaddSpecies(tree *T,node *v, double **D, double **A)
+tree *BMEaddSpecies(tree *T,node *v, double **D, double **A)
/*the key function of the program addSpeices inserts
the node v to the tree T. It uses testEdge to see what the relative
weight would be if v split a particular edge. Once insertion point
edge *e; /*loop variable*/
edge *e_min; /*points to best edge seen thus far*/
double w_min = 0.0; /*used to keep track of tree weights*/
-
+
/*initialize variables as necessary*/
-
+
/*CASE 1: T is empty, v is the first node*/
if (NULL == T) /*create a tree with v as only vertex, no edges*/
{
T_e = newTree();
- T_e->root = v;
- /*note that we are rooting T arbitrarily at a leaf.
+ T_e->root = v;
+ /*note that we are rooting T arbitrarily at a leaf.
T->root is not the phylogenetic root*/
v->index = 0;
T_e->size = 1;
- return(T_e);
+ return(T_e);
}
/*CASE 2: T is a single-vertex tree*/
if (1 == T->size)
A[v->index][v->index] = D[v->index2][T->root->index2];
T->root->leftEdge = v->parentEdge = e;
T->size = 2;
- return(T);
+ return(T);
}
/*CASE 3: T has at least two nodes and an edge. Insert new node
by breaking one of the edges*/
-
+
v->index = T->size;
BMEcalcNewvAverages(T,v,D,A);
- /*calcNewvAverages will update A for the row and column
+ /*calcNewvAverages will update A for the row and column
include the node v. Will do so using pre-existing averages in T and
information from A,D*/
e_min = T->root->leftEdge;
e = e_min->head->leftEdge;
while (NULL != e)
{
- BMEtestEdge(e,v,A);
- /*testEdge tests weight of tree if loop variable
+ BMEtestEdge(e,v,A);
+ /*testEdge tests weight of tree if loop variable
e is the edge split, places this value in the e->totalweight field */
if (e->totalweight < w_min)
{
else if (leaf(e->head))
{
if (leaf(f->head))
- A[e->head->index][f->head->index] =
+ A[e->head->index][f->head->index] =
A[f->head->index][e->head->index]
= D[e->head->index2][f->head->index2];
else
depth-first search, other values
have been calculated*/
v = f->head->rightEdge->head;
- A[e->head->index][f->head->index]
- = A[f->head->index][e->head->index]
+ A[e->head->index][f->head->index]
+ = A[f->head->index][e->head->index]
= 0.5*(A[e->head->index][u->index] + A[e->head->index][v->index]);
}
}
A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = 0.5*(A[f->head->index][u->index] + A[f->head->index][v->index]);
}
f = depthFirstTraverse(T,f);
- }
+ }
e = depthFirstTraverse(T,e);
}
e = depthFirstTraverse(T,NULL);
while (T->root->leftEdge != e)
{
- calcUpAverages(D,A,e,e); /*calculates averages for
+ calcUpAverages(D,A,e,e); /*calculates averages for
A[e->head->index][g->head->index] for
- any edge g in path from e to root of tree*/
+ any edge g in path from e to root of tree*/
e = depthFirstTraverse(T,e);
}
} /*makeAveragesMatrix*/
-//#include <stdio.h>
-//#include <stdlib.h>
-//#include <math.h>
#include "me.h"
/*from NNI.c*/
if(leaf(e->head))
{
f = siblingEdge(e);
- e->distance = 0.5*(A[e->head->index][e->tail->index]
+ e->distance = 0.5*(A[e->head->index][e->tail->index]
+ A[e->head->index][f->head->index]
- A[f->head->index][e->tail->index]);
}
}
}
-double wf(double lambda, double D_LR, double D_LU, double D_LD,
+double wf(double lambda, double D_LR, double D_LU, double D_LD,
double D_RU, double D_RD, double D_DU)
{
double weight;
weight = 0.5*(lambda*(D_LU + D_RD) + (1 -lambda)*(D_LD + D_RU)
- - (D_LR + D_DU));
+ - (D_LR + D_DU));
return(weight);
}
left = e->head->leftEdge;
right = e->head->rightEdge;
sib = siblingEdge(e);
- lambda = ((double) sib->bottomsize*left->bottomsize +
+ lambda = ((double) sib->bottomsize*left->bottomsize +
right->bottomsize*e->tail->parentEdge->topsize) /
(e->bottomsize*e->topsize);
e->distance = wf(lambda,A[left->head->index][right->head->index],
if(leaf(e->head))
while (NULL != f)
{
- if (exclude != f)
+ if (exclude != f)
{
if (leaf(f->head))
A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2];
{
g = f->head->leftEdge;
h = f->head->rightEdge;
- A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize;
+ A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize;
}
}
else /*exclude == f*/
- exclude = exclude->tail->parentEdge;
+ exclude = exclude->tail->parentEdge;
f = depthFirstTraverse(T,f);
}
- else
+ else
/*e->head is not a leaf, so we go recursively to values calculated for
the nodes below e*/
while(NULL !=f )
{
- if (exclude != f)
+ if (exclude != f)
{
g = e->head->leftEdge;
h = e->head->rightEdge;
root*/
f = e->tail->parentEdge;
if (NULL != f)
- fillTableUp(e,f,A,D,T);
+ fillTableUp(e,f,A,D,T);
e = depthFirstTraverse(T,e);
- }
+ }
/*we are indexing this table by vertex indices, but really the
underlying object is the edge set. Thus, the array is one element
{
edge *left, *right;
if (leaf(e->head))
- A[e->head->index][v->index] = D[v->index2][e->head->index2];
+ A[e->head->index][v->index] = D[v->index2][e->head->index2];
else
{
left = e->head->leftEdge;
right = e->head->rightEdge;
- A[e->head->index][v->index] =
- ( left->bottomsize * A[left->head->index][v->index] +
- right->bottomsize * A[right->head->index][v->index])
+ A[e->head->index][v->index] =
+ ( left->bottomsize * A[left->head->index][v->index] +
+ right->bottomsize * A[right->head->index][v->index])
/ e->bottomsize;
}
}
{
up = e->tail->parentEdge;
down = siblingEdge(e);
- A[v->index][e->head->index] =
- (up->topsize * A[v->index][up->head->index] +
+ A[v->index][e->head->index] =
+ (up->topsize * A[v->index][up->head->index] +
down->bottomsize * A[down->head->index][v->index])
/ e->topsize;
}
GMEcalcDownAverage(v,e,D,A);
e = depthFirstTraverse(T,e);
}
-
- e = topFirstTraverse(T,e); /*the upward averages need to be calculated
+
+ e = topFirstTraverse(T,e); /*the upward averages need to be calculated
from top to bottom */
while(NULL != e)
{
}
}
-double wf4(double lambda, double lambda2, double D_AB, double D_AC,
+double wf4(double lambda, double lambda2, double D_AB, double D_AC,
double D_BC, double D_Av, double D_Bv, double D_Cv)
{
return(((1 - lambda) * (D_AC + D_Bv) + (lambda2 - 1)*(D_AB + D_Cv)
/*testEdge cacluates what the OLS weight would be if v were inserted into
- T along e. Compare against known values for inserting along
+ T along e. Compare against known values for inserting along
f = e->parentEdge */
/*edges are tested by a top-first, left-first scheme. we presume
- all distances are fixed to the correct weight for
+ all distances are fixed to the correct weight for
e->parentEdge, if e is a left-oriented edge*/
void testEdge(edge *e, node *v, double **A)
{
/ ((1 + par->topsize)*(par->bottomsize)));
lambda2 = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
/ ((1 + e->bottomsize)*(e->topsize)));
- e->totalweight = par->totalweight
+ e->totalweight = par->totalweight
+ wf4(lambda,lambda2,A[e->head->index][sib->head->index],
A[sib->head->index][e->tail->index],
A[e->head->index][e->tail->index],
A[sib->head->index][v->index],A[e->head->index][v->index],
- A[v->index][e->tail->index]);
+ A[v->index][e->tail->index]);
}
void printDoubleTable(double **A, int d)
void GMEsplitEdge(tree *T, node *v, edge *e, double **A);
-tree *GMEaddSpecies(tree *T,node *v, double **D, double **A)
+tree *GMEaddSpecies(tree *T,node *v, double **D, double **A)
/*the key function of the program addSpeices inserts
the node v to the tree T. It uses testEdge to see what the
weight would be if v split a particular edge. Weights are assigned by
/* if (verbose)
printf("Adding %s.\n",v->label);*/
-
+
/*initialize variables as necessary*/
/*CASE 1: T is empty, v is the first node*/
if (NULL == T) /*create a tree with v as only vertex, no edges*/
{
T_e = newTree();
- T_e->root = v;
- /*note that we are rooting T arbitrarily at a leaf.
+ T_e->root = v;
+ /*note that we are rooting T arbitrarily at a leaf.
T->root is not the phylogenetic root*/
v->index = 0;
T_e->size = 1;
- return(T_e);
+ return(T_e);
}
/*CASE 2: T is a single-vertex tree*/
if (1 == T->size)
A[v->index][v->index] = D[v->index2][T->root->index2];
T->root->leftEdge = v->parentEdge = e;
T->size = 2;
- return(T);
+ return(T);
}
/*CASE 3: T has at least two nodes and an edge. Insert new node
by breaking one of the edges*/
-
+
v->index = T->size;
/*if (!(T->size % 100))
printf("T->size is %d\n",T->size);*/
/*calcNewvAverges will assign values to all the edge averages of T which
include the node v. Will do so using pre-existing averages in T and
information from A,D*/
- e_min = T->root->leftEdge;
- e = e_min->head->leftEdge;
+ e_min = T->root->leftEdge;
+ e = e_min->head->leftEdge;
while (NULL != e)
{
- testEdge(e,v,A);
- /*testEdge tests weight of tree if loop variable
+ testEdge(e,v,A);
+ /*testEdge tests weight of tree if loop variable
e is the edge split, places this weight in e->totalweight field */
if (e->totalweight < w_min)
{
/*we need to update the matrix A so all 1-distant, 2-distant, and
3-distant averages are correct*/
-
+
/*first, initialize the newNode entries*/
/*1-distant*/
- A[newNode->index][newNode->index] =
+ A[newNode->index][newNode->index] =
(e->bottomsize*A[e->head->index][e->head->index]
+ A[v->index][e->head->index])
/ (e->bottomsize + 1);
/*1-distant for v*/
- A[v->index][v->index] =
- (e->bottomsize*A[e->head->index][v->index]
+ A[v->index][v->index] =
+ (e->bottomsize*A[e->head->index][v->index]
+ e->topsize*A[v->index][e->head->index])
/ (e->bottomsize + e->topsize);
/*2-distant for v,newNode*/
- A[v->index][newNode->index] = A[newNode->index][v->index] =
+ A[v->index][newNode->index] = A[newNode->index][v->index] =
A[v->index][e->head->index];
-
+
/*second 2-distant for newNode*/
A[newNode->index][e->tail->index] = A[e->tail->index][newNode->index]
= (e->bottomsize*A[e->head->index][e->tail->index]
/*third 2-distant for newNode*/
A[newNode->index][e->head->index] = A[e->head->index][newNode->index]
= A[e->head->index][e->head->index];
-
+
if (NULL != sib)
{
/*fourth and last 2-distant for newNode*/
A[newNode->index][sib->head->index] =
- A[sib->head->index][newNode->index] =
+ A[sib->head->index][newNode->index] =
(e->bottomsize*A[sib->head->index][e->head->index]
+ A[sib->head->index][v->index]) / (e->bottomsize + 1);
updateSubTreeAverages(A,sib,v,SKEW); /*updates sib and below*/
if (NULL != left)
updateSubTreeAverages(A,left,v,UP); /*updates left and below*/
if (NULL != right)
- updateSubTreeAverages(A,right,v,UP); /*updates right and below*/
+ updateSubTreeAverages(A,right,v,UP); /*updates right and below*/
/*1-dist for e->head*/
- A[e->head->index][e->head->index] =
+ A[e->head->index][e->head->index] =
(e->topsize*A[e->head->index][e->head->index]
+ A[e->head->index][v->index]) / (e->topsize+1);
/*2-dist for e->head (v,newNode,left,right)
taken care of elsewhere*/
/*3-dist with e->head either taken care of elsewhere (below)
or unchanged (sib,e->tail)*/
-
+
/*symmetrize the matrix (at least for distant-2 subtrees) */
A[v->index][e->head->index] = A[e->head->index][v->index];
/*and distant-3 subtrees*/
if (NULL != sib)
A[v->index][sib->head->index] = A[sib->head->index][v->index];
-}
-
+}
+
void GMEsplitEdge(tree *T, node *v, edge *e, double **A)
{
char nodelabel[NODE_LABEL_LENGTH];
edge *newPendantEdge;
edge *newInternalEdge;
node *newNode;
-
+
snprintf(nodelabel,1,"");
- newNode = makeNewNode(nodelabel,T->size + 1);
-
+ newNode = makeNewNode(nodelabel,T->size + 1);
+
//sprintf(edgelabel,"E%d",T->size);
snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size);
- newPendantEdge = makeEdge(edgelabel,newNode,v,0.0);
-
+ newPendantEdge = makeEdge(edgelabel,newNode,v,0.0);
+
//sprintf(edgelabel,"E%d",T->size+1);
snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size+1);
- newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0);
-
+ newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0);
+
/* if (verbose)
printf("Inserting node %s on edge %s between nodes %s and %s.\n",
v->label,e->label,e->tail->label,e->head->label);*/
/*update the matrix of average distances*/
/*also updates the bottomsize, topsize fields*/
-
+
GMEupdateAveragesMatrix(A,e,v,newNode);
newNode->parentEdge = e;
e->head->parentEdge = newInternalEdge;
v->parentEdge = newPendantEdge;
e->head = newNode;
-
+
T->size = T->size + 2;
- if (e->tail->leftEdge == e)
+ if (e->tail->leftEdge == e)
{
newNode->leftEdge = newInternalEdge;
newNode->rightEdge = newPendantEdge;
newNode->leftEdge = newInternalEdge;
newNode->rightEdge = newPendantEdge;
}
-
+
/*assign proper topsize, bottomsize values to the two new Edges*/
-
- newPendantEdge->bottomsize = 1;
+
+ newPendantEdge->bottomsize = 1;
newPendantEdge->topsize = e->bottomsize + e->topsize;
-
+
newInternalEdge->bottomsize = e->bottomsize;
newInternalEdge->topsize = e->topsize; /*off by one, but we adjust
that below*/
-
+
/*and increment these fields for all other edges*/
updateSizes(newInternalEdge,UP);
updateSizes(e,DOWN);
par = e->tail->parentEdge;
switch(direction)
{
- /*want to preserve correctness of
- all 1-distant, 2-distant, and 3-distant averages*/
+ /*want to preserve correctness of
+ all 1-distant, 2-distant, and 3-distant averages*/
/*1-distant updated at edge splitting the two trees*/
/*2-distant updated:
(left->head,right->head) and (head,tail) updated at
(That would lead to multiple updating).*/
/*3-distant updated: at edge in center of quartet*/
case UP: /*point of insertion is above e*/
- /*1-distant average of nodes below e to
+ /*1-distant average of nodes below e to
nodes above e*/
- A[e->head->index][e->head->index] =
- (e->topsize*A[e->head->index][e->head->index] +
- A[e->head->index][v->index])/(e->topsize + 1);
- /*2-distant average of nodes below e to
+ A[e->head->index][e->head->index] =
+ (e->topsize*A[e->head->index][e->head->index] +
+ A[e->head->index][v->index])/(e->topsize + 1);
+ /*2-distant average of nodes below e to
nodes above parent of e*/
- A[e->head->index][par->head->index] =
- A[par->head->index][e->head->index] =
+ A[e->head->index][par->head->index] =
+ A[par->head->index][e->head->index] =
(par->topsize*A[par->head->index][e->head->index]
+ A[e->head->index][v->index]) / (par->topsize + 1);
/*must do both 3-distant averages involving par*/
}
break;
case SKEW: /*point of insertion is skew to e*/
- /*1-distant average of nodes below e to
+ /*1-distant average of nodes below e to
nodes above e*/
- A[e->head->index][e->head->index] =
- (e->topsize*A[e->head->index][e->head->index] +
- A[e->head->index][v->index])/(e->topsize + 1);
+ A[e->head->index][e->head->index] =
+ (e->topsize*A[e->head->index][e->head->index] +
+ A[e->head->index][v->index])/(e->topsize + 1);
/*no 2-distant averages to update in this case*/
/*updating 3-distant averages involving sib*/
if (NULL != left)
case LEFT: /*point of insertion is below the edge left*/
/*1-distant average*/
- A[e->head->index][e->head->index] =
- (e->bottomsize*A[e->head->index][e->head->index] +
- A[v->index][e->head->index])/(e->bottomsize + 1);
+ A[e->head->index][e->head->index] =
+ (e->bottomsize*A[e->head->index][e->head->index] +
+ A[v->index][e->head->index])/(e->bottomsize + 1);
/*2-distant averages*/
- A[e->head->index][e->tail->index] =
- A[e->tail->index][e->head->index] =
- (e->bottomsize*A[e->head->index][e->tail->index] +
- A[v->index][e->tail->index])/(e->bottomsize + 1);
- A[left->head->index][right->head->index] =
- A[right->head->index][left->head->index] =
+ A[e->head->index][e->tail->index] =
+ A[e->tail->index][e->head->index] =
+ (e->bottomsize*A[e->head->index][e->tail->index] +
+ A[v->index][e->tail->index])/(e->bottomsize + 1);
+ A[left->head->index][right->head->index] =
+ A[right->head->index][left->head->index] =
(left->bottomsize*A[right->head->index][left->head->index]
+ A[right->head->index][v->index]) / (left->bottomsize+1);
/*3-distant avereages involving left*/
= (left->bottomsize*A[left->head->index][par->head->index]
+ A[v->index][par->head->index]) / (left->bottomsize + 1);
}
- break;
+ break;
case RIGHT: /*point of insertion is below the edge right*/
/*1-distant average*/
- A[e->head->index][e->head->index] =
- (e->bottomsize*A[e->head->index][e->head->index] +
- A[v->index][e->head->index])/(e->bottomsize + 1);
+ A[e->head->index][e->head->index] =
+ (e->bottomsize*A[e->head->index][e->head->index] +
+ A[v->index][e->head->index])/(e->bottomsize + 1);
/*2-distant averages*/
- A[e->head->index][e->tail->index] =
- A[e->tail->index][e->head->index] =
- (e->bottomsize*A[e->head->index][e->tail->index] +
- A[v->index][e->tail->index])/(e->bottomsize + 1);
- A[left->head->index][right->head->index] =
- A[right->head->index][left->head->index] =
+ A[e->head->index][e->tail->index] =
+ A[e->tail->index][e->head->index] =
+ (e->bottomsize*A[e->head->index][e->tail->index] +
+ A[v->index][e->tail->index])/(e->bottomsize + 1);
+ A[left->head->index][right->head->index] =
+ A[right->head->index][left->head->index] =
(right->bottomsize*A[right->head->index][left->head->index]
+ A[left->head->index][v->index]) / (right->bottomsize+1);
/*3-distant avereages involving right*/