From: paradis Date: Thu, 8 Jan 2009 08:48:22 +0000 (+0000) Subject: various updates + adding SPR+TBR in fastme.bal() X-Git-Url: https://git.donarmstrong.com/?p=ape.git;a=commitdiff_plain;h=f3426364b40c7c0e6aadf6ea2690716425abdfc9 various updates + adding SPR+TBR in fastme.bal() git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@57 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/ChangeLog b/ChangeLog index 5a22435..5aab9bd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,18 @@ CHANGES IN APE VERSION 2.2-3 +NEW FEATURES + + o The new function extract.clade extracts a clade from a tree by + specifying a node number or label. + + o fastme.bal() has two new options 'spr' and 'tbr' to perform tree + operations of the same names. + + o dist.dna() can now return the number of site differences by + specifying model="N". + + BUG FIXES o chronopl() did not work with CV = TRUE. @@ -13,6 +25,14 @@ BUG FIXES the number of lineages with non-binary trees. +OTHER CHANGES + + o ape has now a namespace. + + o drip.tip() has been improved: it should be much faster and work + better in some cases (e.g., see the example in ?zoom). + + CHANGES IN APE VERSION 2.2-2 diff --git a/DESCRIPTION b/DESCRIPTION index 7c5ae46..029c78b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.2-3 -Date: 2008-12-20 +Date: 2009-01-07 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, @@ -22,6 +22,6 @@ Description: ape provides functions for reading, writing, plotting, and clock-like trees using mean path lengths, non-parametric rate smoothing and penalized likelihood, classifying genes in trees using the Klastorin-Misawa-Tajima approach. Phylogeny estimation can be done - with the NJ, BIONJ, ME, and ML methods. + with the NJ, BIONJ, and ME methods. License: GPL (>= 2) URL: http://ape.mpl.ird.fr/ diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..18ed656 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,23 @@ +useDynLib(ape) + +export(as.DNAbin, as.phylo, base.freq, dist.dna, dist.nodes, drop.tip, ltt.plot, nj, rcoal, rtree, rmtree, read.dna, read.nexus, read.tree, vcv.phylo, write.dna, write.nexus, write.tree) + +import(gee, nlme) +importFrom(lattice, xyplot, panel.lines, panel.points) + +S3method(print, phylo) +S3method(plot, phylo) +S3method(as.hclust, phylo) +S3method(reorder, phylo) +S3method(print, multiPhylo) +S3method(plot, multiPhylo) +S3method("[", multiPhylo) +S3method("[[", multiPhylo) +S3method("$", multiPhylo) +S3method(unique, multiPhylo) +S3method(print, DNAbin) +S3method(cbind, DNAbin) +S3method(rbind, DNAbin) +S3method("[", DNAbin) +S3method(summary, DNAbin) +S3method(as.character, DNAbin) diff --git a/R/DNA.R b/R/DNA.R index 7bc0431..007547e 100644 --- a/R/DNA.R +++ b/R/DNA.R @@ -1,4 +1,4 @@ -## DNA.R (2008-10-08) +## DNA.R (2008-12-22) ## Manipulations and Comparisons of DNA Sequences @@ -297,7 +297,7 @@ dist.dna <- function(x, model = "K80", variance = FALSE, gamma = FALSE, as.matrix = FALSE) { MODELS <- c("RAW", "JC69", "K80", "F81", "K81", "F84", "T92", "TN93", - "GG95", "LOGDET", "BH87", "PARALIN") + "GG95", "LOGDET", "BH87", "PARALIN", "N") imod <- which(MODELS == toupper(model)) if (imod == 11 && variance) { warning("computing variance temporarily not available for model BH87.") diff --git a/R/collapse.singles.R b/R/collapse.singles.R index ad3cd98..4d477ab 100644 --- a/R/collapse.singles.R +++ b/R/collapse.singles.R @@ -12,9 +12,9 @@ collapse.singles <- function(tree) elen <- tree$edge.length xmat <- tree$edge ## added by Elizabeth Purdom (2008-06-19): - node.lab<-tree$node.label - nnode<-tree$Nnode - ntip<-length(tree$tip.label) + node.lab <- tree$node.label + nnode <- tree$Nnode + ntip <- length(tree$tip.label) ## end singles <- NA while (length(singles) > 0) { @@ -36,8 +36,8 @@ collapse.singles <- function(tree) ## END elen[prev.node] <- elen[prev.node] + elen[next.node] ## added by Elizabeth Purdom (2008-06-19): - if(!is.null(node.lab)) node.lab<-node.lab[-c(i-ntip)] - nnode<-nnode-1 + if (!is.null(node.lab)) node.lab <- node.lab[-c(i - ntip)] + nnode <- nnode - 1 ## end elen <- elen[-next.node] } @@ -45,8 +45,8 @@ collapse.singles <- function(tree) tree$edge <- xmat tree$edge.length <- elen ## added by Elizabeth Purdom (2008-06-19): - tree$node.label<-node.lab - tree$Nnode<-nnode + tree$node.label <- node.lab + tree$Nnode <- nnode ## end tree } diff --git a/R/drop.tip.R b/R/drop.tip.R index d65fd36..d819c75 100644 --- a/R/drop.tip.R +++ b/R/drop.tip.R @@ -1,160 +1,181 @@ -## drop.tip.R (2008-04-17) +## drop.tip.R (2009-01-07) ## Remove Tips in a Phylogenetic Tree -## Copyright 2003-2008 Emmanuel Paradis +## Copyright 2003-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -drop.tip <- function(phy, tip, trim.internal = TRUE, subtree = FALSE, - root.edge = 0) +extract.clade <- function(phy, node, root.edge = 0) { - if (class(phy) != "phylo") stop("object \"phy\" is not of class \"phylo\"") - phy <- new2old.phylo(phy) + Ntip <- length(phy$tip.label) + ROOT <- Ntip + 1 + Nedge <- dim(phy$edge)[1] + wbl <- !is.null(phy$edge.length) + if (length(node) > 1) { + node <- node[1] + warning("only the first value of 'node' has been considered") + } + if (is.character(node)) { + if (is.null(phy$node.label)) + stop("the tree has no node labels") + node <- which(phy$node.label %in% node) + Ntip + } + if (node <= Ntip) stop("node number must be greater than the number of tips") + if (node == ROOT) return(phy) + phy <- reorder(phy) # insure it is in cladewise order + root.node <- which(phy$edge[, 2] == node) + start <- root.node + 1 # start of the clade looked for + anc <- phy$edge[root.node, 1] # the ancestor of 'node' + next.anc <- which(phy$edge[-(1:start), 1] == anc) # find the next occurence of 'anc' + + keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge + + if (root.edge) { + NewRootEdge <- phy$edge.length[root.node] + root.edge <- root.edge - 1 + while (root.edge) { + if (anc == ROOT) break + i <- which(phy$edge[, 2] == anc) + NewRootEdge <- NewRootEdge + phy$edge.length[i] + root.edge <- root.edge - 1 + anc <- phy$edge[i, 1] + } + if (root.edge && !is.null(phy$root.edge)) + NewRootEdge <- NewRootEdge + phy$root.edge + phy$root.edge <- NewRootEdge + } + + phy$edge <- phy$edge[keep, ] + if (wbl) phy$edge.length <- phy$edge.length[keep] + TIPS <- phy$edge[, 2] <= Ntip + tip <- phy$edge[TIPS, 2] + phy$tip.label <- phy$tip.label[tip] + ## keep the ordering so no need to reorder tip.label: + phy$edge[TIPS, 2] <- order(tip) + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip] + Ntip <- length(phy$tip.label) + phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format -- same as in root() + newNb <- integer(Ntip + phy$Nnode) + newNb[node] <- Ntip + 1L + sndcol <- phy$edge[, 2] > Ntip + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (Ntip + 2):(Ntip + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + phy +} + +drop.tip <- + function(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0) +{ + if (class(phy) != "phylo") + stop('object "phy" is not of class "phylo"') + Ntip <- length(phy$tip.label) + NEWROOT <- ROOT <- Ntip + 1 + Nnode <- phy$Nnode + Nedge <- dim(phy$edge)[1] if (subtree) { trim.internal <- TRUE - edge.bak <- phy$edge + tr <- reorder(phy, "pruningwise") + N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode), + as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]), + as.integer(Nedge), double(Ntip + Nnode), + DUP = FALSE, PACKAGE = "ape")[[6]] } - tmp <- as.numeric(phy$edge) - nb.tip <- max(tmp) - ## fix by Yan Wong: - nodes <- setdiff(tmp,1:nb.tip) #not sure if this also needs sorting into order - ## end - nobr <- is.null(phy$edge.length) - if (is.numeric(tip)) tip <- phy$tip.label[tip] - ## find the tips to drop...: - del <- phy$tip.label %in% tip - ## ... and the corresponding terminal branches: - ind <- which(phy$edge[, 2] %in% as.character(which(del))) - ## drop them...: - phy$edge <- phy$edge[-ind, ] - ## ... and the lengths if applies: - if (!nobr) phy$edge.length <- phy$edge.length[-ind] - ## drop the tip labels: - phy$tip.label <- phy$tip.label[!del] + wbl <- !is.null(phy$edge.length) + edge1 <- phy$edge[, 1] # local copies + edge2 <- phy$edge[, 2] # + keep <- !logical(Nedge) + ## find the tips to drop: + if (is.character(tip)) + tip <- which(phy$tip.label %in% tip) + trms <- edge2 <= Ntip + ## delete the terminal edges given by `tip': + keep[match(tip, edge2)] <- FALSE + if (trim.internal) { - if (root.edge) { - ## find the MRCA of the remaining tips: - seq.nod <- list() - ## This is modified since some tips were deleted!! - for (i in phy$edge[, 2][as.numeric(phy$edge[, 2]) > 0]) { - vec <- i - j <- i - while (j != "-1") { - ind <- which(phy$edge[, 2] == j) - j <- phy$edge[ind, 1] - vec <- c(vec, j) - } - seq.nod[[i]] <- vec - } - sn <- lapply(seq.nod, rev) - i <- 1 - x <- unlist(lapply(sn, function(x) x[i])) - while (length(unique(x)) == 1) { - x <- unlist(lapply(sn, function(x) x[i])) - i <- i + 1 - } - MRCA <- sn[[1]][i - 2] - newrootedge <- if (is.null(phy$root.edge)) 0 else phy$root.edge - for (i in 1:root.edge) { - ind <- which(phy$edge[, 2] == MRCA) - newrootedge <- newrootedge + phy$edge.length[ind] - MRCA <- phy$edge[ind, 1] - if (MRCA == "-1" && i < root.edge) { - newrootedge <- newrootedge - break - } - } - phy$root.edge <- newrootedge - } else { - if (!is.null(phy$root.edge)) phy$root.edge <- NULL + ## delete the internal edges that do not have descendants + ## anymore (ie, they are in the 2nd column of `edge' but + ## not in the 1st one) + repeat { + sel <- !(edge2 %in% edge1[keep]) & !trms & keep + if (!sum(sel)) break + keep[sel] <- FALSE } - while (!all(phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] %in% phy$edge[, 1])) { - temp <- phy$edge[, 2][as.numeric(phy$edge[, 2]) < 0] - k <- temp %in% phy$edge[, 1] - ind <- phy$edge[, 2] %in% temp[!k] - phy$edge <- phy$edge[!ind, ] - if (!nobr) phy$edge.length <- phy$edge.length[!ind] + if (subtree) { + ## keep the subtending edge(s): + subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep] + ## 'if (... ' needed below? + if (any(subt)) keep[which(subt)] <- TRUE } - } else { - ## fix by Yan Wong: - k <- nodes %in% phy$edge[, 1] #nodes that have descendants - ind <- phy$edge[, 2] %in% nodes[!k] - phy$edge[which(ind), 2] <- as.character(nb.tip + (1:sum(ind))) - if (is.null(phy$node.label)) new.tip.label <- rep("NA", sum(ind)) - else new.tip.label <- phy$node.label[!k] - phy$tip.label <- c(phy$tip.label, new.tip.label) - #N.B. phy$node.label can be left: it is altered later - ## end - } - useless.nodes <- names(which(table(phy$edge[, 1]) == 1)) - if (subtree) { - if (!nobr) mnbr <- mean(phy$edge.length) - if (length(useless.nodes) == 1) n <- length(tip) else { - seq.nod <- list() - wh <- numeric(0) - for (i in as.character(which(del))) { # it is not needed to loop through all tips! - vec <- i - j <- i - while (!(j %in% useless.nodes)) { - ind <- which(edge.bak[, 2] == j) - wh <- c(wh, ind) - j <- edge.bak[ind, 1] - vec <- c(vec, j) + if (root.edge && wbl) { + degree <- tabulate(edge1[keep]) + if (degree[ROOT] == 1) { + j <- integer(0) # will store the indices of the edges below the new root + repeat { + i <- which(edge1 == NEWROOT & keep) + j <- c(i, j) + NEWROOT <- edge2[i] + degree <- tabulate(edge1[keep]) + if (degree[NEWROOT] > 1) break } - seq.nod[[i]] <- vec - } - n <- table(unlist(lapply(seq.nod, function(x) rev(x)[1]))) - } - new.lab <- paste("[", n, "_tips]", sep = "") - for (i in 1:length(useless.nodes)) { - wh <- which(phy$edge[, 1] == useless.nodes[i]) - phy$tip.label <- c(phy$tip.label, new.lab[i]) - if (wh == dim(phy$edge)[1]) { - phy$edge <- rbind(phy$edge, c(useless.nodes[i], as.character(nb.tip + i))) - if (!nobr) phy$edge.length <- c(phy$edge.length, mnbr) - } else { - phy$edge <- rbind(phy$edge[1:wh, ], - c(useless.nodes[i], as.character(nb.tip + i)), - phy$edge[(wh + 1):dim(phy$edge)[1], ]) - if (!nobr) phy$edge.length <- c(phy$edge.length[1:wh], mnbr, - phy$edge.length[(wh + 1):(dim(phy$edge)[1] - 1)]) - } - } - } else { - for (i in useless.nodes) { - ind1 <- which(phy$edge[, 1] == i) - ind2 <- which(phy$edge[, 2] == i) - phy$edge[ind2, 2] <- phy$edge[ind1, 2] - phy$edge <- phy$edge[-ind1, ] - if (!nobr) { - phy$edge.length[ind2] <- phy$edge.length[ind2] + phy$edge.length[ind1] - phy$edge.length <- phy$edge.length[-ind1] + keep[j] <- FALSE + if (length(j) > root.edge) j <- 1:root.edge + NewRootEdge <- sum(phy$edge.length[j]) + if (length(j) < root.edge && !is.null(phy$root.edge)) + NewRootEdge <- NewRootEdge + phy$root.edge + phy$root.edge <- NewRootEdge } } } - tmp <- as.numeric(phy$edge) - if (!is.null(phy$node.label)) { - x <- unique(tmp) - x <- x[x < 0] - phy$node.label <- phy$node.label[-x] - } - n <- length(tmp) - nodes <- tmp < 0 - ind.nodes <- (1:n)[nodes] - ind.tips <- (1:n)[!nodes] - new.nodes <- -as.numeric(factor(-tmp[nodes])) - new.tips <- as.numeric(factor(tmp[!nodes])) - tmp[ind.nodes] <- new.nodes - tmp[ind.tips] <- new.tips - dim(tmp) <- c(n / 2, 2) - mode(tmp) <- "character" - phy$edge <- tmp - phy <- old2new.phylo(phy) - if (!trim.internal || subtree) { - S <- write.tree(phy) - phy <- if (nobr) clado.build(S) else tree.build(S) + + if (!root.edge) phy$root.edge <- NULL + + ## upate the tree; 1) drop the edges and tip labels + phy$edge <- phy$edge[keep, ] + if (wbl) phy$edge.length <- phy$edge.length[keep] + phy$tip.label <- phy$tip.label[-tip] + ## 2) renumber the remaining tips now + TIPS <- phy$edge[, 2] <= Ntip + ## keep the ordering so no need to reorder tip.label: + phy$edge[TIPS, 2] <- order(phy$edge[TIPS, 2]) + Ntip <- length(phy$tip.label) # update Ntip + + ## make new tip labels if necessary + if (subtree || !trim.internal) { + new.trms <- !(phy$edge[, 2] %in% phy$edge[, 1]) & phy$edge[, 2] > Ntip + node2tip <- phy$edge[new.trms, 2] + if (subtree) + new.lab <- paste("[", N[node2tip], "_tips]", sep = "") + else { + new.lab <- + if (is.null(phy$node.label)) rep("NA", length(node2tip)) + else phy$node.label[node2tip - Ntip] + } + ## change the #'s in the edge matrix + new.tip <- Ntip + 1:length(node2tip) + phy$edge[new.trms, 2] <- new.tip + phy$tip.label[new.tip] <- new.lab + Ntip <- length(phy$tip.label) + if (!is.null(phy$node.label)) + phy$node.label <- phy$node.label[-(node2tip - Ntip)] } - phy + phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L # 3) update phy$Nnode + + ## The block below renumbers the nodes so that they conform + ## to the "phylo" format -- same as in root() + newNb <- integer(Ntip + phy$Nnode) + newNb[NEWROOT] <- Ntip + 1L + sndcol <- phy$edge[, 2] > Ntip + ## executed from right to left, so newNb is modified before phy$edge: + phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- + (Ntip + 2):(Ntip + phy$Nnode) + phy$edge[, 1] <- newNb[phy$edge[, 1]] + + collapse.singles(phy) } diff --git a/R/me.R b/R/me.R index 93cebd3..7dca6c0 100644 --- a/R/me.R +++ b/R/me.R @@ -1,20 +1,40 @@ -## me.R (2008-01-12) +## me.R (2009-01-07) ## Tree Estimation Based on Minimum Evolution Algorithm ## Copyright 2007 Vincent Lefort with modifications by -## Emmanuel Paradis (2008) +## Emmanuel Paradis (2008-2009) ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -.fastme <- function(X, nni, lib) +fastme.bal <- function(X, nni = TRUE, spr = TRUE, tbr = TRUE) { if (is.matrix(X)) X <- as.dist(X) N <- as.integer(attr(X, "Size")) labels <- sprintf("%6s", 1:N) edge1 <- edge2 <- integer(2*N - 3) - ans <- .C(lib, as.double(X), N, labels, as.integer(nni), + + ans <- .C("me_b", as.double(X), N, labels, as.integer(nni), + as.integer(spr), as.integer(tbr), edge1, edge2, + double(2*N - 3), character(N), PACKAGE = "ape") + + labels <- substr(ans[[10]], 1, 6) + LABS <- attr(X, "Labels") + labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] + else gsub("^ ", "", labels) + structure(list(edge = cbind(ans[[7]], ans[[8]]), edge.length = ans[[9]], + tip.label = labels, Nnode = N - 2L), + class = "phylo") +} + +fastme.ols <- function(X, nni = TRUE) +{ + if (is.matrix(X)) X <- as.dist(X) + N <- as.integer(attr(X, "Size")) + labels <- sprintf("%6s", 1:N) + edge1 <- edge2 <- integer(2*N - 3) + ans <- .C("me_o", as.double(X), N, labels, as.integer(nni), edge1, edge2, double(2*N - 3), character(N), PACKAGE = "ape") labels <- substr(ans[[8]], 1, 6) @@ -22,14 +42,10 @@ labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] else gsub("^ ", "", labels) structure(list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]], - tip.label = labels, Nnode = N - 2), + tip.label = labels, Nnode = N - 2L), class = "phylo") } -fastme.bal <- function(X, nni = TRUE) .fastme(X, nni, "me_b") - -fastme.ols <- function(X, nni = TRUE) .fastme(X, nni, "me_o") - bionj <- function(X) { if (is.matrix(X)) X <- as.dist(X) @@ -43,6 +59,6 @@ bionj <- function(X) labels <- if (!is.null(LABS)) LABS[as.numeric(labels)] else gsub("^ ", "", labels) structure(list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]], - tip.label = labels, Nnode = N - 2), + tip.label = labels, Nnode = N - 2L), class = "phylo") } diff --git a/Thanks b/Thanks index adf40fd..9c705ef 100644 --- a/Thanks +++ b/Thanks @@ -5,9 +5,9 @@ Many users gave important feed-back with their encouragements, comments, or bug reports: thanks to all of you! Significant bug fixes were provided by Cécile Ané, James Bullard, -Éric Durand, Olivier François, Bret Larget, Elizabeth Purdom, -Klaus Schliep, Li-San Wang, and Yan Wong. Contact me if I forgot -someone. +Éric Durand, Olivier François, Bret Larget, Nick Matzke, +Elizabeth Purdom, Klaus Schliep, Li-San Wang, and Yan Wong. Contact +me if I forgot someone. Kurt Hornik, of the R Core Team, helped in several occasions to fix some problems and bugs. diff --git a/man/dist.dna.Rd b/man/dist.dna.Rd index d5857e2..2cee8e2 100644 --- a/man/dist.dna.Rd +++ b/man/dist.dna.Rd @@ -9,10 +9,10 @@ dist.dna(x, model = "K80", variance = FALSE, \arguments{ \item{x}{a matrix or a list containing the DNA sequences.} \item{model}{a character string specifying the evlutionary model to be - used; must be one of \code{"raw"}, \code{"JC69"}, \code{"K80"} (the - default), \code{"F81"}, \code{"K81"}, \code{"F84"}, \code{"BH87"}, - \code{"T92"}, \code{"TN93"}, \code{"GG95"}, \code{"logdet"}, or - \code{"paralin"}.} + used; must be one of \code{"raw"}, \code{"N"}, \code{"JC69"}, + \code{"K80"} (the default), \code{"F81"}, \code{"K81"}, + \code{"F84"}, \code{"BH87"}, \code{"T92"}, \code{"TN93"}, + \code{"GG95"}, \code{"logdet"}, or \code{"paralin"}.} \item{variance}{a logical indicating whether to compute the variances of the distances; defaults to \code{FALSE} so the variances are not computed.} @@ -40,10 +40,10 @@ dist.dna(x, model = "K80", variance = FALSE, brief description is given below; more details can be found in the References. - \item{``raw''}{This is simply the proportion of sites that differ - between each pair of sequences. This may be useful to draw - ``saturation plots''. The options \code{variance} and \code{gamma} - have no effect, but \code{pairwise.deletion} can.} + \item{``raw'', ``N''}{This is simply the proportion or the number of + sites that differ between each pair of sequences. This may be useful + to draw ``saturation plots''. The options \code{variance} and + \code{gamma} have no effect, but \code{pairwise.deletion} can.} \item{``JC69''}{This model was developed by Jukes and Cantor (1969). It assumes that all substitutions (i.e. a change of a base by another diff --git a/man/drop.tip.Rd b/man/drop.tip.Rd index 53bc4f0..f0f2aeb 100644 --- a/man/drop.tip.Rd +++ b/man/drop.tip.Rd @@ -1,9 +1,11 @@ \name{drop.tip} \alias{drop.tip} +\alias{extract.clade} \title{Remove Tips in a Phylogenetic Tree} \usage{ drop.tip(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0) +extract.clade(phy, node, root.edge = 0) } \arguments{ \item{phy}{an object of class \code{"phylo"}.} @@ -16,10 +18,14 @@ drop.tip(phy, tip, trim.internal = TRUE, subtree = FALSE, \item{root.edge}{an integer giving the number of internal branches to be used to build the new root edge. This has no effect if \code{trim.internal = FALSE}.} + \item{node}{a node number or label.} } \description{ - This function removes the terminal branches of a phylogenetic tree, + \code{drop.tip} removes the terminal branches of a phylogenetic tree, possibly removing the corresponding internal branches. + + \code{extract.clade} does the inverse operation: it keeps all the tips + from a given node, and deletes all the other tips. } \details{ The argument \code{tip} can be either character or numeric. In the @@ -27,6 +33,11 @@ drop.tip(phy, tip, trim.internal = TRUE, subtree = FALSE, second case the numbers of these labels in the vector \code{phy$tip.label} are given. + This also applies to \code{node}, but if this argument is character + and the tree has no node label, this results in an error. If more than + one value is given with \code{node} (i.e., a vector of length two or + more), only the first one is used with a warning. + If \code{trim.internal = FALSE}, the new tips are given \code{"NA"} as labels, unless there are node labels in the tree in which case they are used. diff --git a/man/fastme.Rd b/man/fastme.Rd index 897ffa0..794e6c8 100644 --- a/man/fastme.Rd +++ b/man/fastme.Rd @@ -10,12 +10,14 @@ Minimum Evolution algorithm of Desper and Gascuel (2002). } \usage{ - fastme.bal(X, nni = TRUE) + fastme.bal(X, nni = TRUE, spr = TRUE, tbr = TRUE) fastme.ols(X, nni = TRUE) } \arguments{ \item{X}{a distance matrix; may be an object of class \code{"dist"}.} \item{nni}{a boolean value; TRUE to do NNIs (default).} + \item{spr}{ditto for SPRs.} + \item{tbr}{ditto for TBRs.} } \value{ an object of class \code{"phylo"}. diff --git a/src/NNI.c b/src/NNI.c index f9848fc..a63c725 100644 --- a/src/NNI.c +++ b/src/NNI.c @@ -1,9 +1,3 @@ -/*#include -#include -#include -#include "graph.h" -#include "main.h"*/ - #include "me.h" //boolean leaf(node *v); @@ -12,7 +6,7 @@ edge *depthFirstTraverse(tree *T, edge *e); edge *findBottomLeft(edge *e); edge *topFirstTraverse(tree *T, edge *e); edge *moveUpRight(edge *e); -double wf(double lambda, double D_LR, double D_LU, double D_LD, +double wf(double lambda, double D_LR, double D_LU, double D_LD, double D_RU, double D_RD, double D_DU);*/ /*NNI functions for unweighted OLS topological switches*/ @@ -24,29 +18,29 @@ void fillTableUp(edge *e, edge *f, double **A, double **D, tree *T) if (T->root == f->tail) { if (leaf(e->head)) - A[e->head->index][f->head->index] = - A[f->head->index][e->head->index] = + A[e->head->index][f->head->index] = + A[f->head->index][e->head->index] = D[e->head->index2][f->tail->index2]; else { g = e->head->leftEdge; h = e->head->rightEdge; - A[e->head->index][f->head->index] = - A[f->head->index][e->head->index] = + A[e->head->index][f->head->index] = + A[f->head->index][e->head->index] = (g->bottomsize*A[f->head->index][g->head->index] + h->bottomsize*A[f->head->index][h->head->index]) - /e->bottomsize; + /e->bottomsize; } } - else + else { g = f->tail->parentEdge; fillTableUp(e,g,A,D,T); /*recursive call*/ h = siblingEdge(f); - A[e->head->index][f->head->index] = - A[f->head->index][e->head->index] = + A[e->head->index][f->head->index] = + A[f->head->index][e->head->index] = (g->topsize*A[e->head->index][g->head->index] - + h->bottomsize*A[e->head->index][h->head->index])/f->topsize; + + h->bottomsize*A[e->head->index][h->head->index])/f->topsize; } } @@ -84,20 +78,20 @@ int NNIEdgeTest(edge *e, tree *T, double **A, double *weight) double *lambda; double D_LR, D_LU, D_LD, D_RD, D_RU, D_DU; double w1,w2,w0; - + if ((leaf(e->tail)) || (leaf(e->head))) return(NONE); lambda = (double *)malloc(3*sizeof(double)); a = e->tail->parentEdge->topsize; f = siblingEdge(e); - b = f->bottomsize; + b = f->bottomsize; c = e->head->leftEdge->bottomsize; d = e->head->rightEdge->bottomsize; lambda[0] = ((double) b*c + a*d)/((a + b)*(c+d)); - lambda[1] = ((double) b*c + a*d)/((a + c)*(b+d)); + lambda[1] = ((double) b*c + a*d)/((a + c)*(b+d)); lambda[2] = ((double) c*d + a*b)/((a + d)*(b+c)); - + D_LR = A[e->head->leftEdge->head->index][e->head->rightEdge->head->index]; D_LU = A[e->head->leftEdge->head->index][e->tail->index]; D_LD = A[e->head->leftEdge->head->index][f->head->index]; @@ -148,69 +142,68 @@ int NNIEdgeTest(edge *e, tree *T, double **A, double *weight) printf("Weight dropping by %lf.\n",w0 - w1); printf("New weight should be %lf.\n",T->weight + w1 - w0); }*/ - return(LEFT); + return(LEFT); } } - int *initPerm(int size); -void NNIupdateAverages(double **A, edge *e, edge *par, edge *skew, +void NNIupdateAverages(double **A, edge *e, edge *par, edge *skew, edge *swap, edge *fixed, tree *T) { node *v; edge *elooper; v = e->head; /*first, v*/ - A[e->head->index][e->head->index] = - (swap->bottomsize* + A[e->head->index][e->head->index] = + (swap->bottomsize* ((skew->bottomsize*A[skew->head->index][swap->head->index] - + fixed->bottomsize*A[fixed->head->index][swap->head->index]) + + fixed->bottomsize*A[fixed->head->index][swap->head->index]) / e->bottomsize) + par->topsize* ((skew->bottomsize*A[skew->head->index][par->head->index] - + fixed->bottomsize*A[fixed->head->index][par->head->index]) + + fixed->bottomsize*A[fixed->head->index][par->head->index]) / e->bottomsize) - ) / e->topsize; - - elooper = findBottomLeft(e); /*next, we loop over all the edges + ) / e->topsize; + + elooper = findBottomLeft(e); /*next, we loop over all the edges which are below e*/ - while (e != elooper) + while (e != elooper) { - A[e->head->index][elooper->head->index] = - A[elooper->head->index][v->index] + A[e->head->index][elooper->head->index] = + A[elooper->head->index][v->index] = (swap->bottomsize*A[elooper->head->index][swap->head->index] + - par->topsize*A[elooper->head->index][par->head->index]) + par->topsize*A[elooper->head->index][par->head->index]) / e->topsize; elooper = depthFirstTraverse(T,elooper); } elooper = findBottomLeft(swap); /*next we loop over all the edges below and - including swap*/ + including swap*/ while (swap != elooper) { - A[e->head->index][elooper->head->index] = + A[e->head->index][elooper->head->index] = A[elooper->head->index][e->head->index] - = (skew->bottomsize * A[elooper->head->index][skew->head->index] + - fixed->bottomsize*A[elooper->head->index][fixed->head->index]) + = (skew->bottomsize * A[elooper->head->index][skew->head->index] + + fixed->bottomsize*A[elooper->head->index][fixed->head->index]) / e->bottomsize; elooper = depthFirstTraverse(T,elooper); } /*now elooper = skew */ - A[e->head->index][elooper->head->index] = + A[e->head->index][elooper->head->index] = A[elooper->head->index][e->head->index] - = (skew->bottomsize * A[elooper->head->index][skew->head->index] + - fixed->bottomsize* A[elooper->head->index][fixed->head->index]) + = (skew->bottomsize * A[elooper->head->index][skew->head->index] + + fixed->bottomsize* A[elooper->head->index][fixed->head->index]) / e->bottomsize; - - /*finally, we loop over all the edges in the tree - on the far side of parEdge*/ + + /*finally, we loop over all the edges in the tree + on the far side of parEdge*/ elooper = T->root->leftEdge; while ((elooper != swap) && (elooper != e)) /*start a top-first traversal*/ { - A[e->head->index][elooper->head->index] = + A[e->head->index][elooper->head->index] = A[elooper->head->index][e->head->index] - = (skew->bottomsize * A[elooper->head->index][skew->head->index] - + fixed->bottomsize* A[elooper->head->index][fixed->head->index]) + = (skew->bottomsize * A[elooper->head->index][skew->head->index] + + fixed->bottomsize* A[elooper->head->index][fixed->head->index]) / e->bottomsize; elooper = topFirstTraverse(T,elooper); } @@ -220,14 +213,13 @@ void NNIupdateAverages(double **A, edge *e, edge *par, edge *skew, elooper = moveUpRight(par); while (NULL != elooper) { - A[e->head->index][elooper->head->index] + A[e->head->index][elooper->head->index] = A[elooper->head->index][e->head->index] - = (skew->bottomsize * A[elooper->head->index][skew->head->index] + - fixed->bottomsize* A[elooper->head->index][fixed->head->index]) + = (skew->bottomsize * A[elooper->head->index][skew->head->index] + + fixed->bottomsize* A[elooper->head->index][fixed->head->index]) / e->bottomsize; elooper = topFirstTraverse(T,elooper); } - } @@ -235,7 +227,7 @@ void NNItopSwitch(tree *T, edge *e, int direction, double **A) { edge *par, *fixed; edge *skew, *swap; - + /* if (verbose) printf("Branch swap across edge %s.\n",e->label);*/ @@ -246,17 +238,17 @@ void NNItopSwitch(tree *T, edge *e, int direction, double **A) skew = siblingEdge(e); fixed = siblingEdge(swap); par = e->tail->parentEdge; - + /* if (verbose) { printf("Branch swap: switching edges %s and %s.\n",skew->label,swap->label); }*/ /*perform topological switch by changing f from (u,b) to (v,b) and g from (v,c) to (u,c), necessitatates also changing parent fields*/ - + swap->tail = e->tail; skew->tail = e->head; - + if (LEFT == direction) e->head->leftEdge = skew; else @@ -279,17 +271,17 @@ void pushHeap(int *p, int *q, double *v, int length, int i); void popHeap(int *p, int *q, double *v, int length, int i); -void NNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray, +void NNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray, double *weights, int *location, int *possibleSwaps) { int tloc; tloc = location[e->head->index+1]; - location[e->head->index+1] = + location[e->head->index+1] = NNIEdgeTest(e,T,avgDistArray,weights + e->head->index+1); if (NONE == location[e->head->index+1]) { if (NONE != tloc) - popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]); + popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]); } else { @@ -320,32 +312,32 @@ void NNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies) edgeArray = (edge **) malloc((T->size+1)*sizeof(double)); weights = (double *) malloc((T->size+1)*sizeof(double)); location = (int *) malloc((T->size+1)*sizeof(int)); - + double epsilon = 0.0; for (i=0; isize+1;i++) { weights[i] = 0.0; location[i] = NONE; } - e = findBottomLeft(T->root->leftEdge); + e = findBottomLeft(T->root->leftEdge); /* *count = 0; */ while (NULL != e) { edgeArray[e->head->index+1] = e; - location[e->head->index+1] = + location[e->head->index+1] = NNIEdgeTest(e,T,avgDistArray,weights + e->head->index + 1); e = depthFirstTraverse(T,e); - } + } possibleSwaps = makeThreshHeap(p,q,weights,T->size+1,0.0); permInverse(p,q,T->size+1); /*we put the negative values of weights into a heap, indexed by p with the minimum value pointed to by p[1]*/ - /*p[i] is index (in edgeArray) of edge with i-th position + /*p[i] is index (in edgeArray) of edge with i-th position in the heap, q[j] is the position of edge j in the heap */ while (weights[p[1]] + epsilon < 0) { @@ -361,7 +353,7 @@ void NNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies) e = centerEdge->head->leftEdge; NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps); e = centerEdge->head->rightEdge; - NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps); + NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps); e = siblingEdge(centerEdge); NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps); e = centerEdge->tail->parentEdge; diff --git a/src/SPR.c b/src/SPR.c new file mode 100644 index 0000000..82f0a67 --- /dev/null +++ b/src/SPR.c @@ -0,0 +1,417 @@ +/* #include */ +/* #include */ +/* #include */ +/* #include "graph.h" */ +/* #include "fastme.h" */ +#include "me.h" + +/*functions from bNNI.c*/ +void makeBMEAveragesTable(tree *T, double **D, double **A); +void assignBMEWeights(tree *T, double **A); + +/*from me.c*/ +edge *siblingEdge(edge *e); +void weighTree(tree *T); +void freeMatrix(double **D, int size); +edge *depthFirstTraverse(tree *T, edge *e); +double **initDoubleMatrix(int d); + +/*from below*/ +node *indexedNode(tree *T, int i); +edge *indexedEdge(tree *T, int i); +void assignSPRWeights(node *v, double **A, double ***swapWeights); +void SPRTopShift(tree *T, node *vmove, edge *esplit, int UpOrDown); +void assignDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights); +void assignDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights); +void assignDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights); +void assignUpWeights(edge *etest, node *vtest, node *va, edge *back, node *cprve, double oldD_AB, double coeff, double **A, double ***swapWeights); + +void zero3DMatrix(double ***X, int h, int l, int w) +{ + int i,j,k; + for(i=0;iweight);*/ + for(i=0;i<2;i++) + swapWeights[i] = initDoubleMatrix(T->size); + do + { + swapValue=0.0; + zero3DMatrix(swapWeights,2,T->size,T->size); + i = j = k = 0; + for(e=depthFirstTraverse(T,NULL);NULL!=e;e=depthFirstTraverse(T,e)) + assignSPRWeights(e->head,A,swapWeights); + findTableMin(&i,&j,&k,T->size,swapWeights,&swapValue); + swapValue = swapWeights[i][j][k]; + if (swapValue < -EPSILON) + { +// if (verbose) +// printf("New tree weight should be %lf.\n",T->weight + 0.25*swapValue); + v = indexedNode(T,j); + f = indexedEdge(T,k); +// if (verbose) +// printf("Swapping tree below %s to split edge %s with head %s and tail %s\n", +// v->parentEdge->label,f->label,f->head->label,f->tail->label); + SPRTopShift(T,v,f,2-i); + makeBMEAveragesTable(T,D,A); + assignBMEWeights(T,A); + weighTree(T); + (*count)++; + /*sprintf(filename,"tree%d.new",*count);*/ +// if (verbose) +// printf("After %d SPRs, tree weight is %lf.\n\n",*count,T->weight); + /*treefile = fopen(filename,"w"); + NewickPrintTree(T,treefile); + fclose(treefile);*/ + } + } while (swapValue < -EPSILON); + for(i=0;i<2;i++) + freeMatrix(swapWeights[i],T->size); + free(swapWeights); + /*if (verbose) + readOffTree(T);*/ +} + +/*assigns values to array swapWeights*/ +/*swapWeights[0][j][k] will be the value of removing the tree below the edge whose head node has index j +and reattaching it to split the edge whose head has the index k*/ +/*swapWeights[1][j][k] will be the value of removing the tree above the edge whose head node has index j +and reattaching it to split the edge whose head has the index k*/ +void assignSPRWeights(node *vtest, double **A, double ***swapWeights) +{ + edge *etest, *left, *right, *sib, *par; + etest = vtest->parentEdge; + left = vtest->leftEdge; + right = vtest->rightEdge; + par = etest->tail->parentEdge; + sib = siblingEdge(etest); + if (NULL != par) + assignDownWeightsUp(par,vtest,sib->head,NULL,NULL,0.0,1.0,A,swapWeights); + if (NULL != sib) + assignDownWeightsSkew(sib,vtest,sib->tail,NULL,NULL,0.0,1.0,A,swapWeights); + /*assigns values for moving subtree rooted at vtest, starting with edge + parental to tail of edge parental to vtest*/ + if (NULL != left) + { + assignUpWeights(left,vtest,right->head,NULL,NULL,0.0,1.0,A,swapWeights); + assignUpWeights(right,vtest,left->head,NULL,NULL,0.0,1.0,A,swapWeights); + } +} + + +/*recall NNI formula: change in tree length from AB|CD split to AC|BD split is +proportional to D_AC + D_BD - D_AB - D_CD*/ +/*in our case B is the tree being moved (below vtest), A is the tree backwards below back, but + with the vtest subtree removed, C is the sibling tree of back and D is the tree above etest*/ +/*use va to denote the root of the sibling tree to B in the original tree*/ +/*please excuse the multiple uses of the same letters: A,D, etc.*/ +void assignDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights) +{ + edge *par, *sib, *skew; + double D_AC, D_BD, D_AB, D_CD; + par = etest->tail->parentEdge; + skew = siblingEdge(etest); + if (NULL == back) /*first recursive call*/ + { + if (NULL == par) + return; + else /*start the process of assigning weights recursively*/ + { + assignDownWeightsUp(par,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights); + assignDownWeightsSkew(skew,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights); + } + } + else /*second or later recursive call*/ + { + sib = siblingEdge(back); + D_BD = A[vtest->index][etest->head->index]; /*straightforward*/ + D_CD = A[sib->head->index][etest->head->index]; /*this one too*/ + D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index] + - A[sib->head->index][vtest->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[0][vtest->index][etest->head->index] = swapWeights[0][vtest->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD); + if (NULL != par) + { + assignDownWeightsUp(par,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights); + assignDownWeightsSkew(skew,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights); + } + } +} + +void assignDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights) +{ + /*same general idea as assignDownWeights, except needing to keep track of things a bit differently*/ + edge *par, *left, *right; + /*par here = sib before + left, right here = par, skew before*/ + double D_AB, D_CD, D_AC, D_BD; + /*B is subtree being moved - below vtest + A is subtree remaining fixed - below va, unioned with all trees already passed by B*/ + /*C is subtree being passed by B, in this case above par + D is subtree below etest, fixed on other side*/ + par = etest->tail->parentEdge; + left = etest->head->leftEdge; + right = etest->head->rightEdge; + if (NULL == back) + { + if (NULL == left) + return; + else + { + assignDownWeightsDown(left,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights); + assignDownWeightsDown(right,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights); + } + } + else + { + D_BD = A[vtest->index][etest->head->index]; + D_CD = A[par->head->index][etest->head->index]; + D_AC = A[back->head->index][par->head->index] + coeff*(A[va->index][par->head->index] - A[vtest->index][par->head->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[0][vtest->index][etest->head->index] = swapWeights[0][vtest->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD); + if (NULL != left) + { + assignDownWeightsDown(left,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights); + assignDownWeightsDown(right,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights); + } + } +} + +void assignDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights) +{ + /*again the same general idea*/ + edge *sib, *left, *right; + /*sib here = par in assignDownWeightsSkew + rest is the same as assignDownWeightsSkew*/ + double D_AB, D_CD, D_AC, D_BD; + /*B is below vtest, A is below va unioned with all trees already passed by B*/ + /*C is subtree being passed - below sib*/ + /*D is tree below etest*/ + sib = siblingEdge(etest); + left = etest->head->leftEdge; + right = etest->head->rightEdge; + D_BD = A[vtest->index][etest->head->index]; + D_CD = A[sib->head->index][etest->head->index]; + D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index] - A[sib->head->index][vtest->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[0][vtest->index][etest->head->index] = swapWeights[0][vtest->index][back->head->index] + ( D_AC + D_BD - D_AB - D_CD); + if (NULL != left) + { + assignDownWeightsDown(left,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights); + assignDownWeightsDown(right,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights); + } +} + +void assignUpWeights(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, + double ***swapWeights) +{ + /*SPR performed on tree above vtest...*/ + /*same idea as above, with appropriate selections of edges and nodes*/ + edge *sib, *left, *right; + /*B is above vtest, A is other tree below vtest unioned with trees in path to vtest*/ + /*sib is tree C being passed by B*/ + /*D is tree below etest*/ + double D_AB, D_CD, D_AC, D_BD; + double thisWeight; + sib = siblingEdge(etest); + left = etest->head->leftEdge; + right = etest->head->rightEdge; + if (NULL == back) /*first recursive call*/ + { + if (NULL == left) + return; + else /*start the process of assigning weights recursively*/ + { + assignUpWeights(left,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights); + assignUpWeights(right,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights); + } + } + else /*second or later recursive call*/ + { + D_BD = A[vtest->index][etest->head->index]; + D_CD = A[sib->head->index][etest->head->index]; + D_AC = A[back->head->index][sib->head->index] + coeff*(A[va->index][sib->head->index] - A[vtest->index][sib->head->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + thisWeight = swapWeights[1][vtest->index][etest->head->index] = swapWeights[1][vtest->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD); + if (NULL != left) + { + assignUpWeights(left,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights); + assignUpWeights(right,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights); + } + } +} + +void pruneSubtree(edge *p, edge *u, edge *d) +/*starting with edge u above edges p, d*/ +/*removes p, d from tree, u connects to d->head to compensate*/ +{ + p->tail->parentEdge = NULL;/*remove p subtree*/ + u->head = d->head; + d->head->parentEdge = u; /*u connected to d->head*/ + d->head = NULL; /*d removed from tree*/ +} + +void SPRsplitEdge(edge *e, edge *p, edge *d) +/*splits edge e to make it parental to p,d. d is parental to what + previously was below e*/ +{ + d->head = e->head; + e->head = p->tail; + p->tail->parentEdge = e; + d->head->parentEdge = d; +} + + +/*topological shift function*/ +/*removes subtree rooted at v and re-inserts to spilt e*/ +void SPRDownShift(tree *T, node *v, edge *e) +{ + edge *vup, *vdown, *vpar; + vpar = v->parentEdge; + vdown = siblingEdge(vpar); + vup = vpar->tail->parentEdge; + /*topological shift*/ + pruneSubtree(vpar,vup,vdown); + /*removes v subtree and vdown, extends vup*/ + SPRsplitEdge(e,vpar,vdown); /*splits e to make e sibling edge to vpar, + both below vup*/ +} + +void SPRUpShift(tree *T, node *vmove, edge *esplit) +/*an inelegant iterative version*/ +{ + edge *f; + edge **EPath; + edge **sib; + node **v; + int i; + int pathLength; + for(f=esplit->tail->parentEdge,pathLength=1;f->tail != vmove;f=f->tail->parentEdge) + pathLength++; + /*count number of edges to vmove*/ + /*note that pathLength > 0 will hold*/ + + EPath = (edge **)malloc(pathLength*sizeof(edge *)); + v = (node **)malloc(pathLength*sizeof(edge *)); + sib = (edge **)malloc((pathLength+1)*sizeof(edge *)); + /*there are pathLength + 1 side trees, one at the head and tail of each edge in the path*/ + + sib[pathLength] = siblingEdge(esplit); + i = pathLength; + f = esplit->tail->parentEdge; + while (i > 0) + { + i--; + EPath[i] = f; + sib[i] = siblingEdge(f); + v[i] = f->head; + f = f->tail->parentEdge; + } + /*indexed so head of Epath is v[i], tail is v[i-1] and sibling edge is sib[i]*/ + /*need to assign head, tail of each edge in path + as well as have proper values for the left and right fields*/ + + if (esplit == esplit->tail->leftEdge) + { + vmove->leftEdge = esplit; + vmove->rightEdge = EPath[pathLength-1]; + } + else + { + vmove->rightEdge = esplit; + vmove->leftEdge = EPath[pathLength-1]; + } + esplit->tail = vmove; + /*espilt->head remains unchanged*/ + /*vmove has the proper fields for left, right, and parentEdge*/ + + for(i=0;i<(pathLength-1);i++) + EPath[i]->tail = v[i+1]; + + /*this bit flips the orientation along the path + the tail of Epath[i] is now v[i+1] instead of v[i-1]*/ + + EPath[pathLength-1]->tail = vmove; + + for(i=1;ileftEdge) + v[i]->rightEdge = EPath[i-1]; + else + v[i]->leftEdge = EPath[i-1]; + } + if (sib[1] == v[0]->leftEdge) + v[0]->rightEdge = sib[0]; + else + v[0]->leftEdge = sib[0]; + sib[0]->tail = v[0]; + free(EPath); + free(v); + free(sib); +} + + +void SPRTopShift(tree *T, node *vmove, edge *esplit, int UpOrDown) +{ + if (DOWN == UpOrDown) + SPRDownShift(T,vmove,esplit); + else + SPRUpShift(T,vmove,esplit); +} + +node *indexedNode(tree *T, int i) +{ + edge *e; + for(e = depthFirstTraverse(T,NULL);NULL!=e;e=depthFirstTraverse(T,e)) + if (i == e->head->index) + return(e->head); + if (i == T->root->index) + return(T->root); + return(NULL); +} + +edge *indexedEdge(tree *T, int i) +{ + edge *e; + for(e = depthFirstTraverse(T,NULL);NULL!=e;e=depthFirstTraverse(T,e)) + if (i == e->head->index) + return(e); + return(NULL); +} diff --git a/src/TBR.c b/src/TBR.c new file mode 100644 index 0000000..d52f4b2 --- /dev/null +++ b/src/TBR.c @@ -0,0 +1,451 @@ +/* #include */ +/* #include */ +/* #include */ +/* #include "graph.h" */ +/* #include */ +#include "me.h" + +/*functions from me_balanced.c*/ +void makeBMEAveragesTable(tree *T, double **D, double **A); +void assignBMEWeights(tree *T, double **A); + +/*from me.c*/ +edge *siblingEdge(edge *e); +double **initDoubleMatrix(int d); +void freeMatrix(double **D, int size); +edge *depthFirstTraverse(tree *T, edge *e); +edge *findBottomLeft(edge *e); + +/*from bnni.c*/ +void weighTree(tree *T); + +void freeTree(tree *T); + +/*from SPR.c*/ +void zero3DMatrix(double ***X, int h, int l, int w); + +void assignTBRDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights, + double *bestWeight, edge **eBestSplit, edge **eBestTop, edge **eBestBottom); +void assignTBRDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights, + double *bestWeight, edge **eBestSplit, edge **eBestTop, edge **eBestBototm); +void assignTBRDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights, + double *bestWeight, edge **eBestSplit, edge **eBestTop, edge **eBestBottom); + +void assignTBRUpWeights(edge *ebottom, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, + double **dXTop, double ***swapWeights, edge *etop, double *bestWeight, + edge **bestSplit, edge **bestTop, edge **bestBottom) + /*function assigns the value for etop if the tree below vtest is moved to be below etop*/ + /*and SPR for tree bottom tree splits ebottom*/ + /*recursive function searching over values of ebottom*/ + /*minor variant of SPR.c's assignUpWeights + difference is the index assignment in the array swapWeights, which has a different meaning + for the TBR routines*/ +/*also using dXTop to assign value of average distance to tree above vtest*/ +{ /*SPR performed on tree above vtest...*/ + edge *sib, *left, *right; + /*B is above vtest, A is other tree below vtest unioned with trees in path to vtest*/ + /*sib is tree C being passed by B*/ + /*D is tree below etest*/ + double D_AB, D_CD, D_AC, D_BD; + sib = siblingEdge(ebottom); + left = ebottom->head->leftEdge; + right = ebottom->head->rightEdge; + if (NULL == etop) + { + if (NULL == back) /*first recursive call*/ + { + if (NULL == left) + return; /*no subtree below for SPR*/ + else /*NULL == back and NULL == etop*/ + { + assignTBRUpWeights(left,vtest,va,ebottom,va,A[va->index][vtest->index],0.5,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom); + assignTBRUpWeights(right,vtest,va,ebottom,va,A[va->index][vtest->index],0.5,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom); + } + } + else /*NULL != back*/ + { + D_BD = A[ebottom->head->index][vtest->index]; + /*B is tree above vtest, D is tree below ebottom*/ + D_CD = A[sib->head->index][ebottom->head->index]; /*C is tree below sib*/ + D_AC = A[back->head->index][sib->head->index] + + coeff*(A[va->index][sib->head->index] - A[vtest->index][sib->head->index]); + /*va is root of subtree skew back at vtest*/ + /*A is union of tree below va and all subtrees already passed in path from vtest to ebottom*/ + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[vtest->index][ebottom->head->index][ebottom->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD); + if (swapWeights[vtest->index][ebottom->head->index][ebottom->head->index] < *bestWeight) + { + *bestSplit = vtest->parentEdge; + *bestTop = NULL; + *bestBottom = ebottom; + *bestWeight = swapWeights[vtest->index][ebottom->head->index][ebottom->head->index]; + } + if (NULL != left) + { + assignTBRUpWeights(left,vtest,va,ebottom,sib->head,D_AB,0.5*coeff,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom); + assignTBRUpWeights(right,vtest,va,ebottom,sib->head,D_AB,0.5*coeff,A,dXTop,swapWeights,NULL,bestWeight,bestSplit,bestTop,bestBottom); + } + } + } + else /*NULL != etop*/ + { + if (NULL == back) /*first recursive call*/ + { + if (swapWeights[vtest->index][etop->head->index][etop->head->index]< *bestWeight) + /*this represents value of SPR from esplit to etop, with no SPR in bottom tree*/ + { + *bestSplit = vtest->parentEdge; + *bestTop = etop; + *bestBottom = NULL; + *bestWeight = swapWeights[vtest->index][etop->head->index][etop->head->index]; + } + if (NULL == left) + return; /*no subtree below for SPR*/ + else if (NULL != etop)/*start the process of assigning weights recursively*/ + { + assignTBRUpWeights(left,vtest,va,ebottom,va,dXTop[va->index][etop->head->index],0.5,A,dXTop,swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom); + assignTBRUpWeights(right,vtest,va,ebottom,va,dXTop[va->index][etop->head->index],0.5,A,dXTop,swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom); + } + } /*NULL == back*/ + /*in following bit, any average distance of form A[vtest->index][x->index] is + replaced by dXTop[x->index][etop->head->index]*/ + else /*second or later recursive call, NULL != etop*/ + { + D_BD = dXTop[ebottom->head->index][etop->head->index]; /*B is tree above vtest - it is in configuration + indexed by etop*/ + /*D is tree below ebottom*/ + D_CD = A[sib->head->index][ebottom->head->index]; /*C is tree below sib*/ + D_AC = A[back->head->index][sib->head->index] + + coeff*(A[va->index][sib->head->index] - A[sib->head->index][vtest->index]); + /*it is correct to use A[][] here because the bad average distances involving B from the first term will + be cancelled by the bad average distances involving B in the subtracted part*/ + /*va is root of subtree skew back at vtest*/ + /*A is union of tree below va and all subtrees already passed in path from vtest to ebottom*/ + D_AB = 0.5*(oldD_AB + dXTop[cprev->index][etop->head->index]); + swapWeights[vtest->index][ebottom->head->index][etop->head->index] = swapWeights[vtest->index][back->head->index][etop->head->index] + (D_AC + D_BD - D_AB - D_CD); + if (swapWeights[vtest->index][ebottom->head->index][etop->head->index] + swapWeights[vtest->index][etop->head->index][etop->head->index]< *bestWeight) + /*first term is contribution of second SPR, second term is contribution of first SPR*/ + { + *bestSplit = vtest->parentEdge; + *bestTop = etop; + *bestBottom = ebottom; + *bestWeight = swapWeights[vtest->index][ebottom->head->index][etop->head->index] + swapWeights[vtest->index][etop->head->index][etop->head->index]; + } + if (NULL != left) + { + assignTBRUpWeights(left,vtest, va, ebottom, sib->head, D_AB, 0.5*coeff, A, dXTop, swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom); + assignTBRUpWeights(right,vtest, va, ebottom, sib->head, D_AB, 0.5*coeff, A, dXTop, swapWeights,etop,bestWeight,bestSplit,bestTop,bestBottom); + } + } /*else NULL != back, etop*/ + } +} + +/*recall NNI formula: change in tree length from AB|CD split to AC|BD split is +proportional to D_AC + D_BD - D_AB - D_CD*/ +/*in our case B is the tree being moved (below vtest), A is the tree backwards below back, but +with the vtest subtree removed, C is the sibling tree of back and D is the tree above vtest*/ +/*use va to denote the root of the sibling tree to B in the original tree*/ +/*please excuse the multiple uses of the same letters: A,D, etc.*/ +void assignTBRDownWeightsUp(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights, + double *bestWeight, edge **bestSplitEdge, edge **bestTop, edge **bestBottom) +{ + edge *par, *sib, *skew; + double D_AC, D_BD, D_AB, D_CD; + par = etest->tail->parentEdge; + skew = siblingEdge(etest); + if (NULL == back) /*first recursive call*/ + { + if (NULL == par) + return; + else /*start the process of assigning weights recursively*/ + { + assignTBRDownWeightsUp(par,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + assignTBRDownWeightsSkew(skew,vtest,va,etest,va,A[va->index][vtest->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + } + } + else /*second or later recursive call*/ + { + sib = siblingEdge(back); + D_BD = A[vtest->index][etest->head->index]; /*straightforward*/ + D_CD = A[sib->head->index][etest->head->index]; /*this one too*/ + D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index] + - A[sib->head->index][vtest->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[vtest->index][etest->head->index][etest->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD); + /*using diagonal to store values for SPR swaps above the split edge*/ + /*this is value of swapping tree below vtest to break etest*/ + if (swapWeights[vtest->index][etest->head->index][etest->head->index] < *bestWeight) + { + *bestWeight = swapWeights[vtest->index][etest->head->index][etest->head->index]; + *bestSplitEdge = vtest->parentEdge; + *bestTop = etest; + *bestBottom = NULL; + } + if (NULL != par) + { + assignTBRDownWeightsUp(par,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + assignTBRDownWeightsSkew(skew,vtest,va,etest,sib->head,D_AB,0.5*coeff,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + } + } +} + + +void assignTBRDownWeightsSkew(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights, + double *bestWeight, edge **bestSplitEdge, edge **bestTop, edge **bestBottom) +{ + /*same general idea as assignDownWeights, except needing to keep track of things a bit differently*/ + edge *par, *left, *right; + /*par here = sib before + left, right here = par, skew before*/ + double D_AB, D_CD, D_AC, D_BD; + /*B is subtree being moved - below vtest + A is subtree remaining fixed - below va, unioned with all trees already passed by B*/ + /*C is subtree being passed by B, in this case above par + D is subtree below etest, fixed on other side*/ + par = etest->tail->parentEdge; + left = etest->head->leftEdge; + right = etest->head->rightEdge; + if (NULL == back) + { + if (NULL == left) + return; + else + { + assignTBRDownWeightsDown(left,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + assignTBRDownWeightsDown(right,vtest,va,etest,etest->tail,A[vtest->index][etest->tail->index],0.5,A,swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + } + } + else + { + D_BD = A[vtest->index][etest->head->index]; + D_CD = A[par->head->index][etest->head->index]; + D_AC = A[back->head->index][par->head->index] + coeff*(A[va->index][par->head->index] - A[vtest->index][par->head->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[vtest->index][etest->head->index][etest->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + (D_AC + D_BD - D_AB - D_CD); + if (swapWeights[vtest->index][etest->head->index][etest->head->index] < *bestWeight) + { + *bestWeight = swapWeights[vtest->index][etest->head->index][etest->head->index]; + *bestSplitEdge = vtest->parentEdge; + *bestTop = etest; + *bestBottom = NULL; + } + if (NULL != left) + { + assignTBRDownWeightsDown(left,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + assignTBRDownWeightsDown(right,vtest, va, etest, etest->tail, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + } + } +} + +void assignTBRDownWeightsDown(edge *etest, node *vtest, node *va, edge *back, node *cprev, double oldD_AB, double coeff, double **A, double ***swapWeights, + double *bestWeight, edge **bestSplitEdge, edge **bestTop, edge **bestBottom) +{ + /*again the same general idea*/ + edge *sib, *left, *right; + /*sib here = par in assignDownWeightsSkew + rest is the same as assignDownWeightsSkew*/ + double D_AB, D_CD, D_AC, D_BD; + /*B is below vtest, A is below va unioned with all trees already passed by B*/ + /*C is subtree being passed - below sib*/ + /*D is tree below etest*/ + sib = siblingEdge(etest); + left = etest->head->leftEdge; + right = etest->head->rightEdge; + D_BD = A[vtest->index][etest->head->index]; + D_CD = A[sib->head->index][etest->head->index]; + D_AC = A[sib->head->index][back->head->index] + coeff*(A[sib->head->index][va->index] - A[sib->head->index][vtest->index]); + D_AB = 0.5*(oldD_AB + A[vtest->index][cprev->index]); + swapWeights[vtest->index][etest->head->index][etest->head->index] = swapWeights[vtest->index][back->head->index][back->head->index] + ( D_AC + D_BD - D_AB - D_CD); + if (swapWeights[vtest->index][etest->head->index][etest->head->index] < *bestWeight) + { + *bestWeight = swapWeights[vtest->index][etest->head->index][etest->head->index]; + *bestSplitEdge = vtest->parentEdge; + *bestTop = etest; + *bestBottom = NULL; + } + if (NULL != left) + { + assignTBRDownWeightsDown(left,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + assignTBRDownWeightsDown(right,vtest, va, etest, sib->head, D_AB, 0.5*coeff, A, swapWeights,bestWeight,bestSplitEdge,bestTop,bestBottom); + } +} + +/*general idea is to have a double loop for a given edge, testing all SPRs for the subtrees above and below a given edge. + Then that function loops over all the edges of a tree*/ + +void TBRswitch(tree *T, edge *e1, edge *e2, edge *e3); + +/*vbottom is node below esplit for average calculations in matrix dXTop, A is matrix of average + distances from original tree, dXout is average distance from vbottom to tree rooted at far edge + of eback, if SPR breaking eback, UpOrDown indicates whether etop is in path above split edge + (Up) or not (Down)*/ +void calcTBRTopBottomAverage(node *vbottom, double **A, double **dXTop, double dXOut, + edge *esplit, edge *etop, edge *eback, int UpOrDown) +{ + edge *enew1, *enew2, *emove; + double newdXOut; + if (esplit == eback) /*top level call - means trivial SPR*/ + dXTop[vbottom->index][etop->head->index] = A[vbottom->index][esplit->head->index]; + else + dXTop[vbottom->index][etop->head->index] = dXTop[vbottom->index][eback->head->index] + + 0.25*(A[vbottom->index][etop->head->index] - dXOut); + /*by moving etop past the vbottom tree, everything in the etop tree is closer by coefficient of + 0.25, while everything in the old back tree is further by a coefficient of 0.25*/ + /*everything in the tree that is being moved (emove) keeps the same relative weight in the average + distance calculation*/ + if (UP == UpOrDown) + { + enew1 = etop->tail->parentEdge; + if (NULL != enew1) /*need to do recursive calls*/ + { + enew2 = siblingEdge(etop); + emove = siblingEdge(eback); /*emove is third edge meeting at vertex with eback, etest*/ + if (esplit == eback) + newdXOut = A[vbottom->index][emove->head->index]; + else + newdXOut = 0.5*(dXOut + A[vbottom->index][emove->head->index]); + calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit, enew1,etop,UP); /*old etop is new value for back*/ + calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit, enew2,etop,DOWN); + } + } + else /*moving down*/ + { + enew1 = etop->head->leftEdge; + if (NULL != enew1) + { + enew2 = etop->head->rightEdge; + if (eback == siblingEdge(etop)) + emove = etop->tail->parentEdge; + else + emove = siblingEdge(etop); + if (esplit == eback) + newdXOut = A[vbottom->index][emove->head->index]; + else + newdXOut = 0.5*(dXOut + A[vbottom->index][emove->head->index]); + calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit,enew1,etop,DOWN); + calcTBRTopBottomAverage(vbottom,A,dXTop,newdXOut,esplit,enew2,etop,DOWN); + } + } +} + +void calcTBRaverages(tree *T, edge *esplit, double **A, double **dXTop) +{ + edge *ebottom, *par, *sib; + for (ebottom = findBottomLeft(esplit); ebottom != esplit; ebottom = depthFirstTraverse(T,ebottom)) + { + par = esplit->tail->parentEdge; + sib = siblingEdge(esplit); + calcTBRTopBottomAverage(ebottom->head,A, dXTop, 0.0, esplit, par,esplit,UP); + calcTBRTopBottomAverage(ebottom->head,A, dXTop, 0.0, esplit, sib,esplit,DOWN); + } +} + +void TBR(tree *T, double **D, double **A) +{ + int i; + edge *esplit, *etop, *eBestTop, *eBestBottom, *eBestSplit; + edge *eout, *block; + edge *left, *right, *par, *sib; + double **dXTop; /*dXTop[i][j] is average distance from subtree rooted at i to tree above split edge, if + SPR above split edge cuts edge whose head has index j*/ + double bestWeight; + double ***TBRWeights; + dXTop = initDoubleMatrix(T->size); + weighTree(T); + TBRWeights = (double ***)calloc(T->size,sizeof(double **)); + for(i=0;isize;i++) + TBRWeights[i] = initDoubleMatrix(T->size); + do + { + zero3DMatrix(TBRWeights,T->size,T->size,T->size); + bestWeight = 0.0; + eBestSplit = eBestTop = eBestBottom = NULL; + for(esplit=depthFirstTraverse(T,NULL);NULL!=esplit;esplit=depthFirstTraverse(T,esplit)) + { + par = esplit->tail->parentEdge; + if (NULL != par) + { + sib = siblingEdge(esplit); + /*next two lines calculate the possible improvements for any SPR above esplit*/ + assignTBRDownWeightsUp(par,esplit->head,sib->head,NULL,NULL,0.0,1.0,A,TBRWeights,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + assignTBRDownWeightsSkew(sib,esplit->head,sib->tail,NULL,NULL,0.0,1.0,A,TBRWeights,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + calcTBRaverages(T,esplit,A,dXTop); /*calculates the average distance from any subtree + below esplit to the entire subtree above esplit, + after any possible SPR above*/ + /*for etop above esplit, we loop using information from above to calculate values + for all possible SPRs below esplit*/ + } + + right = esplit->head->rightEdge; + if (NULL != right) + { + left = esplit->head->leftEdge; + /*test case: etop = null means only do bottom SPR*/ + assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,NULL,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,NULL,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + + block = esplit; + while (NULL != block) + { + if (block != esplit) + { + etop = block; + assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + } + eout = siblingEdge(block); + if (NULL != eout) + { + etop = findBottomLeft(eout); + while (etop->tail != eout->tail) + { + /*for ebottom below esplit*/ + + assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + etop = depthFirstTraverse(T,etop); + } + + /*etop == eout*/ + + assignTBRUpWeights(left,esplit->head,right->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + assignTBRUpWeights(right,esplit->head,left->head,NULL,NULL,0.0,1.0,A,dXTop,TBRWeights,etop,&bestWeight,&eBestSplit,&eBestTop,&eBestBottom); + } + block = block->tail->parentEdge; + } + } /*if NULL != right*/ + } /*for esplit*/ + /*find bestWeight, best split edge, etc.*/ + if (bestWeight < -EPSILON) + { +// if (verbose) +// { +// printf("TBR #%d: Splitting edge %s: top edge %s, bottom edge %s\n",*count+1, +// eBestSplit->label, eBestTop->label,eBestBottom->label); +// printf("Old tree weight is %lf, new tree weight should be %lf\n",T->weight, T->weight + 0.25*bestWeight); +// } + TBRswitch(T,eBestSplit,eBestTop,eBestBottom); + makeBMEAveragesTable(T,D,A); + assignBMEWeights(T,A); + weighTree(T); +// if (verbose) +// printf("TBR: new tree weight is %lf\n\n",T->weight);< +// (*count)++; + } + else + bestWeight = 1.0; + } while (bestWeight < -EPSILON); + for(i=0;isize;i++) + freeMatrix(TBRWeights[i],T->size); + freeMatrix(dXTop,T->size); +} + +void SPRTopShift(tree *T, node *v, edge *e, int UpOrDown); + +void TBRswitch(tree *T, edge *es, edge *et, edge *eb) +{ + if (NULL != et) + SPRTopShift(T,es->head,et,DOWN); /*DOWN because tree being moved is below split edge*/ + if (NULL != eb) + SPRTopShift(T,es->head,eb,UP); /*UP because tree being moved is above split edge*/ +} diff --git a/src/bNNI.c b/src/bNNI.c index fe7fcce..9363ea5 100644 --- a/src/bNNI.c +++ b/src/bNNI.c @@ -1,9 +1,3 @@ -/*#include -#include -#include -#include "graph.h" -#include "main.h" -*/ #include "me.h" /*boolean leaf(node *v); @@ -27,17 +21,17 @@ void pushHeap(int *p, int *q, double *v, int length, int i); void popHeap(int *p, int *q, double *v, int length, int i); -void bNNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray, +void bNNIRetestEdge(int *p, int *q, edge *e,tree *T, double **avgDistArray, double *weights, int *location, int *possibleSwaps) { int tloc; tloc = location[e->head->index+1]; - location[e->head->index+1] = + location[e->head->index+1] = bNNIEdgeTest(e,T,avgDistArray,weights + e->head->index+1); if (NONE == location[e->head->index+1]) { if (NONE != tloc) - popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]); + popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]); } else { @@ -74,7 +68,7 @@ void bNNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies edgeArray = (edge **) malloc((T->size+1)*sizeof(double)); weights = (double *) malloc((T->size+1)*sizeof(double)); location = (int *) malloc((T->size+1)*sizeof(int)); - + double epsilon = 0.0; for (i=0; iroot->leftEdge); + e = findBottomLeft(T->root->leftEdge); while (NULL != e) { edgeArray[e->head->index+1] = e; - location[e->head->index+1] = + location[e->head->index+1] = bNNIEdgeTest(e,T,avgDistArray,weights + e->head->index + 1); e = depthFirstTraverse(T,e); - } + } possibleSwaps = makeThreshHeap(p,q,weights,T->size+1,0.0); permInverse(p,q,T->size+1); /*we put the negative values of weights into a heap, indexed by p with the minimum value pointed to by p[1]*/ - /*p[i] is index (in edgeArray) of edge with i-th position + /*p[i] is index (in edgeArray) of edge with i-th position in the heap, q[j] is the position of edge j in the heap */ while (weights[p[1]] + epsilon < 0) { @@ -156,10 +150,10 @@ void updateSubTreeAfterNNI(double **A, node *v, edge *rootEdge, node *closer, no updateSubTreeAfterNNI(A, v, rootEdge->head->rightEdge, closer, further, 0.5*dcoeff,UP); updatePair(A, rootEdge, rootEdge, closer, further, dcoeff, UP); sib = siblingEdge(v->parentEdge); - A[rootEdge->head->index][v->index] = - A[v->index][rootEdge->head->index] = + A[rootEdge->head->index][v->index] = + A[v->index][rootEdge->head->index] = 0.5*A[rootEdge->head->index][sib->head->index] + - 0.5*A[rootEdge->head->index][v->parentEdge->tail->index]; + 0.5*A[rootEdge->head->index][v->parentEdge->tail->index]; break; case DOWN: /*rootEdge is above the center edge of the NNI*/ sib = siblingEdge(rootEdge); @@ -168,8 +162,8 @@ void updateSubTreeAfterNNI(double **A, node *v, edge *rootEdge, node *closer, no if (NULL != rootEdge->tail->parentEdge) updateSubTreeAfterNNI(A, v, rootEdge->tail->parentEdge, closer, further, 0.5*dcoeff, DOWN); updatePair(A, rootEdge, rootEdge, closer, further, dcoeff, DOWN); - A[rootEdge->head->index][v->index] = - A[v->index][rootEdge->head->index] = + A[rootEdge->head->index][v->index] = + A[v->index][rootEdge->head->index] = 0.5*A[rootEdge->head->index][v->leftEdge->head->index] + 0.5*A[rootEdge->head->index][v->rightEdge->head->index]; break; @@ -179,8 +173,8 @@ void updateSubTreeAfterNNI(double **A, node *v, edge *rootEdge, node *closer, no if (NULL != rootEdge->head->rightEdge) updateSubTreeAfterNNI(A, v, rootEdge->head->rightEdge, closer, further, 0.5*dcoeff,SKEW); updatePair(A, rootEdge, rootEdge, closer, further, dcoeff, UP); - A[rootEdge->head->index][v->index] = - A[v->index][rootEdge->head->index] = + A[rootEdge->head->index][v->index] = + A[v->index][rootEdge->head->index] = 0.5*A[rootEdge->head->index][v->leftEdge->head->index] + 0.5*A[rootEdge->head->index][v->rightEdge->head->index]; break; @@ -188,18 +182,17 @@ void updateSubTreeAfterNNI(double **A, node *v, edge *rootEdge, node *closer, no } /*swapping across edge whose head is v*/ -void bNNIupdateAverages(double **A, node *v, edge *par, edge *skew, +void bNNIupdateAverages(double **A, node *v, edge *par, edge *skew, edge *swap, edge *fixed) -{ - A[v->index][v->index] = 0.25*(A[fixed->head->index][par->head->index] + - A[fixed->head->index][swap->head->index] + - A[skew->head->index][par->head->index] + +{ + A[v->index][v->index] = 0.25*(A[fixed->head->index][par->head->index] + + A[fixed->head->index][swap->head->index] + + A[skew->head->index][par->head->index] + A[skew->head->index][swap->head->index]); updateSubTreeAfterNNI(A, v, fixed, skew->head, swap->head, 0.25, UP); updateSubTreeAfterNNI(A, v, par, swap->head, skew->head, 0.25, DOWN); - updateSubTreeAfterNNI(A, v, skew, fixed->head, par->head, 0.25, UP); + updateSubTreeAfterNNI(A, v, skew, fixed->head, par->head, 0.25, UP); updateSubTreeAfterNNI(A, v, swap, par->head, fixed->head, 0.25, SKEW); - } @@ -309,11 +302,10 @@ int bNNIEdgeTest(edge *e, tree *T, double **A, double *weight) printf("Weight dropping by %lf.\n",w0 - w1); printf("New weight should be %lf.\n",T->weight + w1 - w0); }*/ - return(LEFT); + return(LEFT); } } - /*limitedFillTableUp fills all the entries in D associated with e->head,f->head and those edges g->head above e->head, working recursively and stopping when trigger is reached*/ @@ -324,8 +316,7 @@ void limitedFillTableUp(edge *e, edge *f, double **A, edge *trigger) if (f != trigger) limitedFillTableUp(e,g,A,trigger); h = siblingEdge(f); - A[e->head->index][f->head->index] = - A[f->head->index][e->head->index] = - 0.5*(A[e->head->index][g->head->index] + A[e->head->index][h->head->index]); + A[e->head->index][f->head->index] = + A[f->head->index][e->head->index] = + 0.5*(A[e->head->index][g->head->index] + A[e->head->index][h->head->index]); } - diff --git a/src/dist_dna.c b/src/dist_dna.c index f10bc4d..d56c794 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,4 +1,4 @@ -/* dist_dna.c 2008-01-19 */ +/* dist_dna.c 2008-12-22 */ /* Copyright 2005-2008 Emmanuel Paradis @@ -64,7 +64,7 @@ double detFourByFour(double *x) if (KnownBase(x[s1]) && KnownBase(x[s2])) L++;\ else continue; -void distDNA_raw(unsigned char *x, int *n, int *s, double *d) +void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) { int i1, i2, s1, s2, target, Nd; @@ -74,13 +74,14 @@ void distDNA_raw(unsigned char *x, int *n, int *s, double *d) Nd = 0; for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) if (DifferentBase(x[s1], x[s2])) Nd++; - d[target] = ((double) Nd / *s); + if (scaled) d[target] = ((double) Nd / *s); + else d[target] = ((double) Nd); target++; } } } -void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d) +void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d, int scaled) { int i1, i2, s1, s2, target, Nd, L; @@ -92,7 +93,8 @@ void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d) CHECK_PAIRWISE_DELETION if (DifferentBase(x[s1], x[s2])) Nd++; } - d[target] = ((double) Nd/L); + if (scaled) d[target] = ((double) Nd/L); + else d[target] = ((double) Nd); target++; } } @@ -1009,8 +1011,8 @@ void dist_dna(unsigned char *x, int *n, int *s, int *model, double *d, int *gamma, double *alpha) { switch (*model) { - case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d); - else distDNA_raw(x, n, s, d); break; + case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 1); + else distDNA_raw(x, n, s, d, 1); break; case 2 : if (pairdel) distDNA_JC69_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_JC69(x, n, s, d, variance, var, gamma, alpha); break; @@ -1043,5 +1045,7 @@ void dist_dna(unsigned char *x, int *n, int *s, int *model, double *d, case 12 : if (pairdel) distDNA_ParaLin_pairdel(x, n, s, d, variance, var); else distDNA_ParaLin(x, n, s, d, variance, var); break; + case 13 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 0); + else distDNA_raw(x, n, s, d, 0); break; } } diff --git a/src/heap.c b/src/heap.c index 8d48335..fa007f3 100644 --- a/src/heap.c +++ b/src/heap.c @@ -1,8 +1,3 @@ -/*#include -#include -#include -#include "main.h"*/ - #include "me.h" int *initPerm(int size) @@ -35,7 +30,7 @@ void swap(int *p, int *q, int i, int j) /*The usual Heapify function, tailored for our use with a heap of scores*/ /*will use array p to keep track of indexes*/ -/*after scoreHeapify is called, the subtree rooted at i +/*after scoreHeapify is called, the subtree rooted at i will be a heap*/ /*p goes from heap to array, q goes from array to heap*/ @@ -55,8 +50,8 @@ void heapify(int *p, int *q, double *HeapArray, int i, int n) if ((right <= n) && (HeapArray[p[right]] < HeapArray[p[smallest]])) smallest = right; if (smallest != i){ - swap(p,q,i, smallest); - /*push smallest up the heap*/ + swap(p,q,i, smallest); + /*push smallest up the heap*/ i = smallest; /*check next level down*/ } else @@ -64,7 +59,7 @@ void heapify(int *p, int *q, double *HeapArray, int i, int n) } while(moreswap); } -/*heap is of indices of elements of v, +/*heap is of indices of elements of v, popHeap takes the index at position i and pushes it out of the heap (by pushing it to the bottom of the heap, where it is not noticed)*/ diff --git a/src/me.c b/src/me.c index 69d1cb2..21a2396 100644 --- a/src/me.c +++ b/src/me.c @@ -20,9 +20,14 @@ void makeOLSAveragesTable(tree *T, double **D, double **A); void bNNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies); //functions from NNI.c void NNI(tree *T, double **avgDistArray, int *count, double **D, int numSpecies); +//functions from SPR.c +void SPR(tree *T, double **D, double **A, int *count); +//functions from TBR.c +void TBR(tree *T, double **D, double **A); -void me_b(double *X, int *N, char **labels, int *nni, +void me_b(double *X, int *N, char **labels, + int *nni, int *spr, int *tbr, int *edge1, int *edge2, double *el, char **tl) { double **D, **A; @@ -46,10 +51,12 @@ void me_b(double *X, int *N, char **labels, int *nni, T = BMEaddSpecies(T, addNode, D, A); } // Compute bNNI - if (*nni == 1) - bNNI(T, A, &nniCount, D, n); + if (*nni) bNNI(T, A, &nniCount, D, n); assignBMEWeights(T,A); + if (*spr) SPR(T, D, A, &nniCount); + if (*tbr) TBR(T, D, A); + tree2phylo(T, edge1, edge2, el, tl, n); freeMatrix(D,n); @@ -85,7 +92,7 @@ void me_o(double *X, int *N, char **labels, int *nni, } makeOLSAveragesTable(T,D,A); // Compute NNI - if (*nni == 1) + if (*nni) NNI(T,A,&nniCount,D,n); assignOLSWeights(T,A); diff --git a/src/me.h b/src/me.h index f049ef6..0616592 100644 --- a/src/me.h +++ b/src/me.h @@ -109,7 +109,7 @@ typedef struct set struct set *secondNode; } set; -void me_b(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl); +void me_b(double *X, int *N, char **labels, int *nni, int *spr, int *tbr, int *edge1, int *edge2, double *el, char **tl); void me_o(double *X, int *N, char **labels, int *nni, int *edge1, int *edge2, double *el, char **tl); //int whiteSpace(char c); double **initDoubleMatrix(int d); diff --git a/src/me_balanced.c b/src/me_balanced.c index f78595d..4484d18 100644 --- a/src/me_balanced.c +++ b/src/me_balanced.c @@ -1,6 +1,3 @@ -//#include -//#include -//#include #include "me.h" void BalWFext(edge *e, double **A) /*works except when e is the one edge @@ -48,18 +45,18 @@ void assignBMEWeights(tree *T, double **A) BalWFint(e,A); e = depthFirstTraverse(T,e); } -} +} void BMEcalcDownAverage(tree *T, node *v, edge *e, double **D, double **A) { edge *left, *right; if (leaf(e->head)) - A[e->head->index][v->index] = D[v->index2][e->head->index2]; + A[e->head->index][v->index] = D[v->index2][e->head->index2]; else { left = e->head->leftEdge; right = e->head->rightEdge; - A[e->head->index][v->index] = 0.5 * A[left->head->index][v->index] + A[e->head->index][v->index] = 0.5 * A[left->head->index][v->index] + 0.5 * A[right->head->index][v->index]; } } @@ -95,8 +92,8 @@ void BMEcalcNewvAverages(tree *T, node *v, double **D, double **A) BMEcalcDownAverage(T,v,e,D,A); e = depthFirstTraverse(T,e); } - - e = topFirstTraverse(T,e); /*the upward averages need to be calculated + + e = topFirstTraverse(T,e); /*the upward averages need to be calculated from top to bottom */ while(NULL != e) { @@ -117,7 +114,7 @@ void updatePair(double **A, edge *nearEdge, edge *farEdge, node *v, switch(direction) /*the various cases refer to where the new vertex has been inserted, in relation to the edge nearEdge*/ { - case UP: /*this case is called when v has been inserted above + case UP: /*this case is called when v has been inserted above or skew to farEdge*/ /*do recursive calls first!*/ if (NULL != farEdge->head->leftEdge) @@ -129,7 +126,7 @@ void updatePair(double **A, edge *nearEdge, edge *farEdge, node *v, = A[farEdge->head->index][nearEdge->head->index] + dcoeff*A[farEdge->head->index][v->index] - dcoeff*A[farEdge->head->index][root->index]; - break; + break; case DOWN: /*called when v has been inserted below farEdge*/ if (NULL != farEdge->tail->parentEdge) updatePair(A,nearEdge,farEdge->tail->parentEdge,v,root,dcoeff,DOWN); @@ -140,7 +137,7 @@ void updatePair(double **A, edge *nearEdge, edge *farEdge, node *v, A[nearEdge->head->index][farEdge->head->index] = A[farEdge->head->index][nearEdge->head->index] + dcoeff*A[v->index][farEdge->head->index] - - dcoeff*A[farEdge->head->index][root->index]; + - dcoeff*A[farEdge->head->index][root->index]; } } @@ -152,9 +149,9 @@ void updateSubTree(double **A, edge *nearEdge, node *v, node *root, { case UP: /*newNode is above the edge nearEdge*/ A[v->index][nearEdge->head->index] = A[nearEdge->head->index][v->index]; - A[newNode->index][nearEdge->head->index] = + A[newNode->index][nearEdge->head->index] = A[nearEdge->head->index][newNode->index] = - A[nearEdge->head->index][root->index]; + A[nearEdge->head->index][root->index]; if (NULL != nearEdge->head->leftEdge) updateSubTree(A, nearEdge->head->leftEdge, v, root, newNode, 0.5*dcoeff, UP); if (NULL != nearEdge->head->rightEdge) @@ -163,9 +160,9 @@ void updateSubTree(double **A, edge *nearEdge, node *v, node *root, break; case DOWN: /*newNode is below the edge nearEdge*/ A[nearEdge->head->index][v->index] = A[v->index][nearEdge->head->index]; - A[newNode->index][nearEdge->head->index] = + A[newNode->index][nearEdge->head->index] = A[nearEdge->head->index][newNode->index] = - 0.5*(A[nearEdge->head->index][root->index] + 0.5*(A[nearEdge->head->index][root->index] + A[v->index][nearEdge->head->index]); sib = siblingEdge(nearEdge); if (NULL != sib) @@ -176,10 +173,10 @@ void updateSubTree(double **A, edge *nearEdge, node *v, node *root, break; case SKEW: /*newNode is neither above nor below nearEdge*/ A[v->index][nearEdge->head->index] = A[nearEdge->head->index][v->index]; - A[newNode->index][nearEdge->head->index] = + A[newNode->index][nearEdge->head->index] = A[nearEdge->head->index][newNode->index] = - 0.5*(A[nearEdge->head->index][root->index] + - A[nearEdge->head->index][v->index]); + 0.5*(A[nearEdge->head->index][root->index] + + A[nearEdge->head->index][v->index]); if (NULL != nearEdge->head->leftEdge) updateSubTree(A, nearEdge->head->leftEdge, v, root, newNode, 0.5*dcoeff,SKEW); if (NULL != nearEdge->head->rightEdge) @@ -189,7 +186,7 @@ void updateSubTree(double **A, edge *nearEdge, node *v, node *root, } -/*we update all the averages for nodes (u1,u2), where the insertion point of +/*we update all the averages for nodes (u1,u2), where the insertion point of v is in "direction" from both u1 and u2 */ /*The general idea is to proceed in a direction from those edges already corrected */ @@ -202,9 +199,9 @@ void BMEupdateAveragesMatrix(double **A, edge *e, node *v,node *newNode) /*first, update the v,newNode entries*/ A[newNode->index][newNode->index] = 0.5*(A[e->head->index][e->head->index] + A[v->index][e->head->index]); - A[v->index][newNode->index] = A[newNode->index][v->index] = + A[v->index][newNode->index] = A[newNode->index][v->index] = A[v->index][e->head->index]; - A[v->index][v->index] = + A[v->index][v->index] = 0.5*(A[e->head->index][v->index] + A[v->index][e->head->index]); left = e->head->leftEdge; right = e->head->rightEdge; @@ -226,7 +223,7 @@ void BMEupdateAveragesMatrix(double **A, edge *e, node *v,node *newNode) A[v->index][e->head->index] = A[e->head->index][v->index]; updatePair(A,e,e,v,e->head,0.5,UP); /*updates e->head fields only*/ -} +} /*A is tree below sibling, B is tree below edge, C is tree above edge*/ double wf3(double D_AB, double D_AC, double D_kB, double D_kC) @@ -259,7 +256,6 @@ void BMEsplitEdge(tree *T, node *v, edge *e, double **A) //sprintf(edgeLabel2,"E%d",T->size+1); snprintf(edgeLabel1,EDGE_LABEL_LENGTH,"E%d",T->size); snprintf(edgeLabel2,EDGE_LABEL_LENGTH,"E%d",T->size+1); - /*make the new node and edges*/ newNode = makeNewNode(nodeLabel,T->size+1); @@ -275,9 +271,9 @@ void BMEsplitEdge(tree *T, node *v, edge *e, double **A) v->parentEdge = newPendantEdge; e->head = newNode; - T->size = T->size + 2; + T->size = T->size + 2; - if (e->tail->leftEdge == e) + if (e->tail->leftEdge == e) /*actually this is totally arbitrary and probably unnecessary*/ { newNode->leftEdge = newInternalEdge; @@ -290,7 +286,7 @@ void BMEsplitEdge(tree *T, node *v, edge *e, double **A) } } -tree *BMEaddSpecies(tree *T,node *v, double **D, double **A) +tree *BMEaddSpecies(tree *T,node *v, double **D, double **A) /*the key function of the program addSpeices inserts the node v to the tree T. It uses testEdge to see what the relative weight would be if v split a particular edge. Once insertion point @@ -302,19 +298,19 @@ tree *BMEaddSpecies(tree *T,node *v, double **D, double **A) edge *e; /*loop variable*/ edge *e_min; /*points to best edge seen thus far*/ double w_min = 0.0; /*used to keep track of tree weights*/ - + /*initialize variables as necessary*/ - + /*CASE 1: T is empty, v is the first node*/ if (NULL == T) /*create a tree with v as only vertex, no edges*/ { T_e = newTree(); - T_e->root = v; - /*note that we are rooting T arbitrarily at a leaf. + T_e->root = v; + /*note that we are rooting T arbitrarily at a leaf. T->root is not the phylogenetic root*/ v->index = 0; T_e->size = 1; - return(T_e); + return(T_e); } /*CASE 2: T is a single-vertex tree*/ if (1 == T->size) @@ -326,22 +322,22 @@ tree *BMEaddSpecies(tree *T,node *v, double **D, double **A) A[v->index][v->index] = D[v->index2][T->root->index2]; T->root->leftEdge = v->parentEdge = e; T->size = 2; - return(T); + return(T); } /*CASE 3: T has at least two nodes and an edge. Insert new node by breaking one of the edges*/ - + v->index = T->size; BMEcalcNewvAverages(T,v,D,A); - /*calcNewvAverages will update A for the row and column + /*calcNewvAverages will update A for the row and column include the node v. Will do so using pre-existing averages in T and information from A,D*/ e_min = T->root->leftEdge; e = e_min->head->leftEdge; while (NULL != e) { - BMEtestEdge(e,v,A); - /*testEdge tests weight of tree if loop variable + BMEtestEdge(e,v,A); + /*testEdge tests weight of tree if loop variable e is the edge split, places this value in the e->totalweight field */ if (e->totalweight < w_min) { @@ -405,7 +401,7 @@ void makeBMEAveragesTable(tree *T, double **D, double **A) else if (leaf(e->head)) { if (leaf(f->head)) - A[e->head->index][f->head->index] = + A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2]; else @@ -414,8 +410,8 @@ void makeBMEAveragesTable(tree *T, double **D, double **A) depth-first search, other values have been calculated*/ v = f->head->rightEdge->head; - A[e->head->index][f->head->index] - = A[f->head->index][e->head->index] + A[e->head->index][f->head->index] + = A[f->head->index][e->head->index] = 0.5*(A[e->head->index][u->index] + A[e->head->index][v->index]); } } @@ -426,15 +422,15 @@ void makeBMEAveragesTable(tree *T, double **D, double **A) A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = 0.5*(A[f->head->index][u->index] + A[f->head->index][v->index]); } f = depthFirstTraverse(T,f); - } + } e = depthFirstTraverse(T,e); } e = depthFirstTraverse(T,NULL); while (T->root->leftEdge != e) { - calcUpAverages(D,A,e,e); /*calculates averages for + calcUpAverages(D,A,e,e); /*calculates averages for A[e->head->index][g->head->index] for - any edge g in path from e to root of tree*/ + any edge g in path from e to root of tree*/ e = depthFirstTraverse(T,e); } } /*makeAveragesMatrix*/ diff --git a/src/me_ols.c b/src/me_ols.c index d4a8fcb..3c002c9 100644 --- a/src/me_ols.c +++ b/src/me_ols.c @@ -1,6 +1,3 @@ -//#include -//#include -//#include #include "me.h" /*from NNI.c*/ @@ -15,7 +12,7 @@ void OLSext(edge *e, double **A) if(leaf(e->head)) { f = siblingEdge(e); - e->distance = 0.5*(A[e->head->index][e->tail->index] + e->distance = 0.5*(A[e->head->index][e->tail->index] + A[e->head->index][f->head->index] - A[f->head->index][e->tail->index]); } @@ -29,12 +26,12 @@ void OLSext(edge *e, double **A) } } -double wf(double lambda, double D_LR, double D_LU, double D_LD, +double wf(double lambda, double D_LR, double D_LU, double D_LD, double D_RU, double D_RD, double D_DU) { double weight; weight = 0.5*(lambda*(D_LU + D_RD) + (1 -lambda)*(D_LD + D_RU) - - (D_LR + D_DU)); + - (D_LR + D_DU)); return(weight); } @@ -45,7 +42,7 @@ void OLSint(edge *e, double **A) left = e->head->leftEdge; right = e->head->rightEdge; sib = siblingEdge(e); - lambda = ((double) sib->bottomsize*left->bottomsize + + lambda = ((double) sib->bottomsize*left->bottomsize + right->bottomsize*e->tail->parentEdge->topsize) / (e->bottomsize*e->topsize); e->distance = wf(lambda,A[left->head->index][right->head->index], @@ -88,7 +85,7 @@ void makeOLSAveragesTable(tree *T, double **D, double **A) if(leaf(e->head)) while (NULL != f) { - if (exclude != f) + if (exclude != f) { if (leaf(f->head)) A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2]; @@ -96,19 +93,19 @@ void makeOLSAveragesTable(tree *T, double **D, double **A) { g = f->head->leftEdge; h = f->head->rightEdge; - A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize; + A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize; } } else /*exclude == f*/ - exclude = exclude->tail->parentEdge; + exclude = exclude->tail->parentEdge; f = depthFirstTraverse(T,f); } - else + else /*e->head is not a leaf, so we go recursively to values calculated for the nodes below e*/ while(NULL !=f ) { - if (exclude != f) + if (exclude != f) { g = e->head->leftEdge; h = e->head->rightEdge; @@ -126,9 +123,9 @@ void makeOLSAveragesTable(tree *T, double **D, double **A) root*/ f = e->tail->parentEdge; if (NULL != f) - fillTableUp(e,f,A,D,T); + fillTableUp(e,f,A,D,T); e = depthFirstTraverse(T,e); - } + } /*we are indexing this table by vertex indices, but really the underlying object is the edge set. Thus, the array is one element @@ -144,14 +141,14 @@ void GMEcalcDownAverage(node *v, edge *e, double **D, double **A) { edge *left, *right; if (leaf(e->head)) - A[e->head->index][v->index] = D[v->index2][e->head->index2]; + A[e->head->index][v->index] = D[v->index2][e->head->index2]; else { left = e->head->leftEdge; right = e->head->rightEdge; - A[e->head->index][v->index] = - ( left->bottomsize * A[left->head->index][v->index] + - right->bottomsize * A[right->head->index][v->index]) + A[e->head->index][v->index] = + ( left->bottomsize * A[left->head->index][v->index] + + right->bottomsize * A[right->head->index][v->index]) / e->bottomsize; } } @@ -165,8 +162,8 @@ void GMEcalcUpAverage(node *v, edge *e, double **D, double **A) { up = e->tail->parentEdge; down = siblingEdge(e); - A[v->index][e->head->index] = - (up->topsize * A[v->index][up->head->index] + + A[v->index][e->head->index] = + (up->topsize * A[v->index][up->head->index] + down->bottomsize * A[down->head->index][v->index]) / e->topsize; } @@ -187,8 +184,8 @@ void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A) GMEcalcDownAverage(v,e,D,A); e = depthFirstTraverse(T,e); } - - e = topFirstTraverse(T,e); /*the upward averages need to be calculated + + e = topFirstTraverse(T,e); /*the upward averages need to be calculated from top to bottom */ while(NULL != e) { @@ -197,7 +194,7 @@ void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A) } } -double wf4(double lambda, double lambda2, double D_AB, double D_AC, +double wf4(double lambda, double lambda2, double D_AB, double D_AC, double D_BC, double D_Av, double D_Bv, double D_Cv) { return(((1 - lambda) * (D_AC + D_Bv) + (lambda2 - 1)*(D_AB + D_Cv) @@ -206,10 +203,10 @@ double wf4(double lambda, double lambda2, double D_AB, double D_AC, /*testEdge cacluates what the OLS weight would be if v were inserted into - T along e. Compare against known values for inserting along + T along e. Compare against known values for inserting along f = e->parentEdge */ /*edges are tested by a top-first, left-first scheme. we presume - all distances are fixed to the correct weight for + all distances are fixed to the correct weight for e->parentEdge, if e is a left-oriented edge*/ void testEdge(edge *e, node *v, double **A) { @@ -223,12 +220,12 @@ void testEdge(edge *e, node *v, double **A) / ((1 + par->topsize)*(par->bottomsize))); lambda2 = (((double) (sib->bottomsize + e->bottomsize*par->topsize)) / ((1 + e->bottomsize)*(e->topsize))); - e->totalweight = par->totalweight + e->totalweight = par->totalweight + wf4(lambda,lambda2,A[e->head->index][sib->head->index], A[sib->head->index][e->tail->index], A[e->head->index][e->tail->index], A[sib->head->index][v->index],A[e->head->index][v->index], - A[v->index][e->tail->index]); + A[v->index][e->tail->index]); } void printDoubleTable(double **A, int d) @@ -244,7 +241,7 @@ void printDoubleTable(double **A, int d) void GMEsplitEdge(tree *T, node *v, edge *e, double **A); -tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) +tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) /*the key function of the program addSpeices inserts the node v to the tree T. It uses testEdge to see what the weight would be if v split a particular edge. Weights are assigned by @@ -257,18 +254,18 @@ tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) /* if (verbose) printf("Adding %s.\n",v->label);*/ - + /*initialize variables as necessary*/ /*CASE 1: T is empty, v is the first node*/ if (NULL == T) /*create a tree with v as only vertex, no edges*/ { T_e = newTree(); - T_e->root = v; - /*note that we are rooting T arbitrarily at a leaf. + T_e->root = v; + /*note that we are rooting T arbitrarily at a leaf. T->root is not the phylogenetic root*/ v->index = 0; T_e->size = 1; - return(T_e); + return(T_e); } /*CASE 2: T is a single-vertex tree*/ if (1 == T->size) @@ -282,11 +279,11 @@ tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) A[v->index][v->index] = D[v->index2][T->root->index2]; T->root->leftEdge = v->parentEdge = e; T->size = 2; - return(T); + return(T); } /*CASE 3: T has at least two nodes and an edge. Insert new node by breaking one of the edges*/ - + v->index = T->size; /*if (!(T->size % 100)) printf("T->size is %d\n",T->size);*/ @@ -294,12 +291,12 @@ tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) /*calcNewvAverges will assign values to all the edge averages of T which include the node v. Will do so using pre-existing averages in T and information from A,D*/ - e_min = T->root->leftEdge; - e = e_min->head->leftEdge; + e_min = T->root->leftEdge; + e = e_min->head->leftEdge; while (NULL != e) { - testEdge(e,v,A); - /*testEdge tests weight of tree if loop variable + testEdge(e,v,A); + /*testEdge tests weight of tree if loop variable e is the edge split, places this weight in e->totalweight field */ if (e->totalweight < w_min) { @@ -329,23 +326,23 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) /*we need to update the matrix A so all 1-distant, 2-distant, and 3-distant averages are correct*/ - + /*first, initialize the newNode entries*/ /*1-distant*/ - A[newNode->index][newNode->index] = + A[newNode->index][newNode->index] = (e->bottomsize*A[e->head->index][e->head->index] + A[v->index][e->head->index]) / (e->bottomsize + 1); /*1-distant for v*/ - A[v->index][v->index] = - (e->bottomsize*A[e->head->index][v->index] + A[v->index][v->index] = + (e->bottomsize*A[e->head->index][v->index] + e->topsize*A[v->index][e->head->index]) / (e->bottomsize + e->topsize); /*2-distant for v,newNode*/ - A[v->index][newNode->index] = A[newNode->index][v->index] = + A[v->index][newNode->index] = A[newNode->index][v->index] = A[v->index][e->head->index]; - + /*second 2-distant for newNode*/ A[newNode->index][e->tail->index] = A[e->tail->index][newNode->index] = (e->bottomsize*A[e->head->index][e->tail->index] @@ -353,12 +350,12 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) /*third 2-distant for newNode*/ A[newNode->index][e->head->index] = A[e->head->index][newNode->index] = A[e->head->index][e->head->index]; - + if (NULL != sib) { /*fourth and last 2-distant for newNode*/ A[newNode->index][sib->head->index] = - A[sib->head->index][newNode->index] = + A[sib->head->index][newNode->index] = (e->bottomsize*A[sib->head->index][e->head->index] + A[sib->head->index][v->index]) / (e->bottomsize + 1); updateSubTreeAverages(A,sib,v,SKEW); /*updates sib and below*/ @@ -373,17 +370,17 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) if (NULL != left) updateSubTreeAverages(A,left,v,UP); /*updates left and below*/ if (NULL != right) - updateSubTreeAverages(A,right,v,UP); /*updates right and below*/ + updateSubTreeAverages(A,right,v,UP); /*updates right and below*/ /*1-dist for e->head*/ - A[e->head->index][e->head->index] = + A[e->head->index][e->head->index] = (e->topsize*A[e->head->index][e->head->index] + A[e->head->index][v->index]) / (e->topsize+1); /*2-dist for e->head (v,newNode,left,right) taken care of elsewhere*/ /*3-dist with e->head either taken care of elsewhere (below) or unchanged (sib,e->tail)*/ - + /*symmetrize the matrix (at least for distant-2 subtrees) */ A[v->index][e->head->index] = A[e->head->index][v->index]; /*and distant-3 subtrees*/ @@ -395,8 +392,8 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) if (NULL != sib) A[v->index][sib->head->index] = A[sib->head->index][v->index]; -} - +} + void GMEsplitEdge(tree *T, node *v, edge *e, double **A) { char nodelabel[NODE_LABEL_LENGTH]; @@ -404,34 +401,34 @@ void GMEsplitEdge(tree *T, node *v, edge *e, double **A) edge *newPendantEdge; edge *newInternalEdge; node *newNode; - + snprintf(nodelabel,1,""); - newNode = makeNewNode(nodelabel,T->size + 1); - + newNode = makeNewNode(nodelabel,T->size + 1); + //sprintf(edgelabel,"E%d",T->size); snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size); - newPendantEdge = makeEdge(edgelabel,newNode,v,0.0); - + newPendantEdge = makeEdge(edgelabel,newNode,v,0.0); + //sprintf(edgelabel,"E%d",T->size+1); snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size+1); - newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0); - + newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0); + /* if (verbose) printf("Inserting node %s on edge %s between nodes %s and %s.\n", v->label,e->label,e->tail->label,e->head->label);*/ /*update the matrix of average distances*/ /*also updates the bottomsize, topsize fields*/ - + GMEupdateAveragesMatrix(A,e,v,newNode); newNode->parentEdge = e; e->head->parentEdge = newInternalEdge; v->parentEdge = newPendantEdge; e->head = newNode; - + T->size = T->size + 2; - if (e->tail->leftEdge == e) + if (e->tail->leftEdge == e) { newNode->leftEdge = newInternalEdge; newNode->rightEdge = newPendantEdge; @@ -441,16 +438,16 @@ void GMEsplitEdge(tree *T, node *v, edge *e, double **A) newNode->leftEdge = newInternalEdge; newNode->rightEdge = newPendantEdge; } - + /*assign proper topsize, bottomsize values to the two new Edges*/ - - newPendantEdge->bottomsize = 1; + + newPendantEdge->bottomsize = 1; newPendantEdge->topsize = e->bottomsize + e->topsize; - + newInternalEdge->bottomsize = e->bottomsize; newInternalEdge->topsize = e->topsize; /*off by one, but we adjust that below*/ - + /*and increment these fields for all other edges*/ updateSizes(newInternalEdge,UP); updateSizes(e,DOWN); @@ -466,8 +463,8 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) par = e->tail->parentEdge; switch(direction) { - /*want to preserve correctness of - all 1-distant, 2-distant, and 3-distant averages*/ + /*want to preserve correctness of + all 1-distant, 2-distant, and 3-distant averages*/ /*1-distant updated at edge splitting the two trees*/ /*2-distant updated: (left->head,right->head) and (head,tail) updated at @@ -475,15 +472,15 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) (That would lead to multiple updating).*/ /*3-distant updated: at edge in center of quartet*/ case UP: /*point of insertion is above e*/ - /*1-distant average of nodes below e to + /*1-distant average of nodes below e to nodes above e*/ - A[e->head->index][e->head->index] = - (e->topsize*A[e->head->index][e->head->index] + - A[e->head->index][v->index])/(e->topsize + 1); - /*2-distant average of nodes below e to + A[e->head->index][e->head->index] = + (e->topsize*A[e->head->index][e->head->index] + + A[e->head->index][v->index])/(e->topsize + 1); + /*2-distant average of nodes below e to nodes above parent of e*/ - A[e->head->index][par->head->index] = - A[par->head->index][e->head->index] = + A[e->head->index][par->head->index] = + A[par->head->index][e->head->index] = (par->topsize*A[par->head->index][e->head->index] + A[e->head->index][v->index]) / (par->topsize + 1); /*must do both 3-distant averages involving par*/ @@ -506,11 +503,11 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) } break; case SKEW: /*point of insertion is skew to e*/ - /*1-distant average of nodes below e to + /*1-distant average of nodes below e to nodes above e*/ - A[e->head->index][e->head->index] = - (e->topsize*A[e->head->index][e->head->index] + - A[e->head->index][v->index])/(e->topsize + 1); + A[e->head->index][e->head->index] = + (e->topsize*A[e->head->index][e->head->index] + + A[e->head->index][v->index])/(e->topsize + 1); /*no 2-distant averages to update in this case*/ /*updating 3-distant averages involving sib*/ if (NULL != left) @@ -534,16 +531,16 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) case LEFT: /*point of insertion is below the edge left*/ /*1-distant average*/ - A[e->head->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->head->index] + - A[v->index][e->head->index])/(e->bottomsize + 1); + A[e->head->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->head->index] + + A[v->index][e->head->index])/(e->bottomsize + 1); /*2-distant averages*/ - A[e->head->index][e->tail->index] = - A[e->tail->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->tail->index] + - A[v->index][e->tail->index])/(e->bottomsize + 1); - A[left->head->index][right->head->index] = - A[right->head->index][left->head->index] = + A[e->head->index][e->tail->index] = + A[e->tail->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->tail->index] + + A[v->index][e->tail->index])/(e->bottomsize + 1); + A[left->head->index][right->head->index] = + A[right->head->index][left->head->index] = (left->bottomsize*A[right->head->index][left->head->index] + A[right->head->index][v->index]) / (left->bottomsize+1); /*3-distant avereages involving left*/ @@ -566,19 +563,19 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) = (left->bottomsize*A[left->head->index][par->head->index] + A[v->index][par->head->index]) / (left->bottomsize + 1); } - break; + break; case RIGHT: /*point of insertion is below the edge right*/ /*1-distant average*/ - A[e->head->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->head->index] + - A[v->index][e->head->index])/(e->bottomsize + 1); + A[e->head->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->head->index] + + A[v->index][e->head->index])/(e->bottomsize + 1); /*2-distant averages*/ - A[e->head->index][e->tail->index] = - A[e->tail->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->tail->index] + - A[v->index][e->tail->index])/(e->bottomsize + 1); - A[left->head->index][right->head->index] = - A[right->head->index][left->head->index] = + A[e->head->index][e->tail->index] = + A[e->tail->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->tail->index] + + A[v->index][e->tail->index])/(e->bottomsize + 1); + A[left->head->index][right->head->index] = + A[right->head->index][left->head->index] = (right->bottomsize*A[right->head->index][left->head->index] + A[left->head->index][v->index]) / (right->bottomsize+1); /*3-distant avereages involving right*/