]> git.donarmstrong.com Git - ape.git/commitdiff
big update with files from Andrei
authorparadis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Thu, 20 Oct 2011 08:13:38 +0000 (08:13 +0000)
committerparadis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Thu, 20 Oct 2011 08:13:38 +0000 (08:13 +0000)
git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@171 6e262413-ae40-0410-9e79-b911bd7a66b7

33 files changed:
DESCRIPTION
NEWS
R/SDM.R [new file with mode: 0644]
R/additive.R [new file with mode: 0644]
R/as.bitsplits.R [new file with mode: 0644]
R/is.compatible.R [new file with mode: 0644]
R/mvr.R [new file with mode: 0644]
R/njs.R [new file with mode: 0644]
R/treePop.R [new file with mode: 0644]
R/triangMtd.R [new file with mode: 0644]
Thanks
man/SDM.Rd [new file with mode: 0644]
man/additive.Rd [new file with mode: 0644]
man/all.equal.phylo.Rd
man/as.bitsplits.Rd [new file with mode: 0644]
man/bd.time.Rd
man/is.compatible.Rd [new file with mode: 0644]
man/mvr.Rd [new file with mode: 0644]
man/njs.Rd [new file with mode: 0644]
man/treePop.Rd [new file with mode: 0644]
man/triangMtd.Rd [new file with mode: 0644]
src/additive.c [new file with mode: 0644]
src/ape.c [new file with mode: 0644]
src/ape.h [new file with mode: 0644]
src/bionjs.c [new file with mode: 0644]
src/mvr.c [new file with mode: 0644]
src/mvrs.c [new file with mode: 0644]
src/nj.c
src/njs.c [new file with mode: 0644]
src/treePop.c [new file with mode: 0644]
src/triangMtd.c [new file with mode: 0644]
src/triangMtds.c [new file with mode: 0644]
src/ultrametric.c [new file with mode: 0644]

index 6b51928739bc9db83ab87917cc3e334b612aba70..5882a71bc76c9fbf65d6ce68d22adc93e42f3a75 100644 (file)
@@ -1,8 +1,8 @@
 Package: ape
 Version: 2.8
-Date: 2011-09-24
+Date: 2011-10-20
 Title: Analyses of Phylogenetics and Evolution
-Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
+Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
 Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
 Depends: R (>= 2.6.0)
 Suggests: gee
@@ -17,6 +17,8 @@ Description: ape provides functions for reading, writing, plotting,
   skyline plots, estimation of absolute evolutionary rates and
   clock-like trees using mean path lengths, non-parametric rate
   smoothing and penalized likelihood. Phylogeny estimation can be
-  done with the NJ, BIONJ, and ME methods.
+  done with the NJ, BIONJ, ME, MVR, SDM, and triangle methods, and
+  several methods handling incomplete distance matrices (NJ*, BIONJ*,
+  MVR*, and the corresponding triangle method).
 License: GPL (>= 2)
 URL: http://ape.mpl.ird.fr/
diff --git a/NEWS b/NEWS
index 776fd4ae533120c384bbe3cfca87a8bbac66b698..17f28e25af52fa9bf1440a6c698b0179ac4f1a94 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,14 +3,24 @@
 
 NEW FEATURES
 
+    o Twelve new functions have been written by Andrei-Alin Popescu:
+      additive, ultrametric, is.compatible, arecompatible, mvr, mvrs,
+      njs, bionjs, SDM, treePop, triangMtd, triangMtd*.
+
+    o A new class "bitsplits" has been created by Andrei-Alin Popescu
+      to code splits (aka, bipartition).
+
+    o There is a new generic function as.bitsplits with a method to
+      convert from the class "prop.part" to the class "bitsplits".
+
+    o The new function ltt.coplot plots on the same scales a tree and
+      the derived LTT plot.
+
     o ltt.plot() has two new options: backward and tol. It can now
       handle non-ultrametic trees and its internal coding has been
       improved. The coordinates of the plot can now be computed with
       the new function ltt.plot.coords.
 
-    o The new function ltt.coplot plots on the same scales a tree and
-      its LTT plot.
-
 
 
                CHANGES IN APE VERSION 2.7-3
diff --git a/R/SDM.R b/R/SDM.R
new file mode 100644 (file)
index 0000000..94a8f31
--- /dev/null
+++ b/R/SDM.R
@@ -0,0 +1,183 @@
+## SDM.R (2011-10-11)
+
+## Construction of Consensus Distance Matrix With SDM
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+SDM <- function(...)
+{
+    st <- list(...) # first half contains matrices, second half s_p
+    k <- length(st)/2
+    labels <- attr(as.dist(st[[1]]), "Labels")
+    tot <- length(rownames(st[[1]]))
+    for (i in 2:k) {
+        labels <- union(labels, attr(as.dist(st[[i]]), "Labels"))
+        tot <- tot + length(rownames(st[[i]]))
+    }
+    sp <- mat.or.vec(1,k)
+    for (i in c(k+1:k))
+        sp[i - k] <- st[[i]]
+
+    astart <- mat.or.vec(1, tot) # start of aip, astart[p] is start of aip
+    astart[1] <- k
+    for (i in 2:k)
+        astart[i] <- astart[i - 1] + length(rownames(st[[i - 1]]))
+
+    a <- mat.or.vec(1, k + tot + k + length(labels))
+    ## first k are alphas, subseqeunt ones aip
+    ## each matrix p starting at astart[p], next are
+    ## Lagrange multipliers, miu, niu, lambda in that order
+    n <- length(labels)
+    miustart <- k + tot
+    niustart <- miustart + n
+    lambstart <- niustart + k - 1
+
+    X <- mat.or.vec(n, n)
+    V <- mat.or.vec(n, n)
+    w <- mat.or.vec(n, n)
+
+    col <- mat.or.vec(k + tot + k + length(labels), 1) # free terms of system
+
+    dimnames(X) <- list(labels, labels)
+    dimnames(V) <- list(labels, labels)
+    dimnames(W) <- list(labels, labels)
+
+    for (i in 1:(n - 1)) {
+        for (j in (i + 1):n) {
+            for (p in 1:k) {
+                d <- st[[p]]
+                if (is.element(rownames(X)[i], rownames(d)) & is.element(colnames(X)[j], colnames(d))) {
+                    w[i, j] <- w[j, i] <- w[i, j] + sp[p]
+                }
+            }
+        }
+    }
+
+    Q <- mat.or.vec(length(labels) + k + k + tot, length(labels) + k + k + tot)
+    ## first decompose first sum in paper
+    for (p in 1:k) {
+        d_p <- st[[p]]
+        for (l in 1:k) { # first compute coeficients of alphas
+            sum <- 0
+            dijp <- -1
+            if (l == p) { # calculate alpha_p
+                for (i in 1:n) {
+                    for (j in 1:n) { #check if {i,j}\subset L_l
+                        d <- st[[l]]
+                        if (i != j & is.element(labels[i],rownames(d)) & is.element(labels[j],colnames(d))) {
+                            dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]]
+                            sum <- sum + ((dij*dij) - sp[l]*dij*dij/w[i,j])
+                            ipos <- which(rownames(d) == labels[i])
+                            jpos <- which(rownames(d) == labels[j])
+                            Q[p, astart[l] + ipos] <- Q[p, astart[l] + ipos] + (dij - (sp[l]*dij/w[i,j]))
+                            Q[p, astart[l] + jpos] <- Q[p, astart[l] + jpos] + (dij - (sp[l]*dij/w[i,j]))
+                        }
+                    }
+                }
+            } else {
+                for (i in 1:n) {
+                    for (j in 1:n) { #check if {i,j}\subset L_l
+                        d <- st[[l]]
+                        if (i != j & is.element(labels[i], rownames(d)) & is.element(labels[j], colnames(d)) & is.element(labels[i], rownames(d_p)) & is.element(labels[j], colnames(d_p))) {
+                            dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]]
+                            dijp <- d_p[rownames(d_p) == labels[i], colnames(d_p) == labels[j]]
+                            sum <- sum - sp[l]*dij*dijp/w[i, j]
+                            ipos <- which(rownames(d) == labels[i])
+                            jpos <- which(rownames(d) == labels[j])
+                            Q[p,astart[l] + ipos] <- Q[p, astart[l] + ipos] - sp[l]*dijp/w[i, j]
+                            Q[p,astart[l] + jpos] <- Q[p, astart[l] + jpos] - sp[l]*dijp/w[i, j]
+                        }
+                    }
+                }
+            }
+            Q[p, l] <- sum
+        }
+        Q[p, lambstart + 1] <- 1
+    }
+    r <- k
+    for (p in 1:k) {
+        dp <- st[[p]]
+        for (i in 1:n) {
+            if (is.element(labels[i], rownames(dp))) {
+                r <- r + 1
+                for (l in 1:k) {
+                    d <- st[[l]]
+                    if (l == p) {
+                        for (j in 1:n) {
+                            if (i != j & is.element(labels[j], rownames(dp))) {
+                                dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]]
+                                Q[r, l] <- Q[r, l] + (dij - sp[l]*dij/w[i, j])
+                                ipos <- which(rownames(d) == labels[i])
+                                jpos <- which(rownames(d) == labels[j])
+                                Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] + (1 - sp[l]/w[i, j])
+                                Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] + (1 - sp[l]/w[i, j])
+                            }
+                        }
+                    } else {
+                        for (j in 1:n) {
+                            if (i != j & is.element(labels[j], rownames(dp)) & is.element(labels[i], rownames(d)) & is.element(labels[j], colnames(d))) {
+                                dij <- d[rownames(d) == labels[i], colnames(d) == labels[j]]
+                                Q[r,l] <- Q[r,l] - sp[l]*dij/w[i, j]
+                                ipos <- which(rownames(d) == labels[i])
+                                jpos <- which(rownames(d) == labels[j])
+                                Q[r, astart[l] + ipos] <- Q[r, astart[l] + ipos] - sp[l]/w[i, j]
+                                Q[r, astart[l] + jpos] <- Q[r, astart[l] + jpos] - sp[l]/w[i, j]
+                            }
+                        }
+                    }
+                }
+                if (p < k) Q[r, ] <- Q[r, ] * sp[p]
+                Q[r, miustart + i] <- 1
+                if (p < k) Q[r, niustart + p] <- 1
+            }
+        }
+    }
+    r <- r + 1
+    col[r] <- k
+    for (i in 1:k) Q[r,i] <- 1
+
+    for (i in 1:n) {
+        r <- r + 1
+        for (p in 1:k) {
+            d <- st[[p]]
+            if (is.element(labels[i], rownames(d))) {
+                ipos <- which(rownames(d) == labels[i])
+                Q[r, astart[p] + ipos] <- 1
+            }
+        }
+    }
+    for (p in 1:(k - 1)) {
+        r <- r + 1
+        for (i in 1:n) {
+            d <- st[[p]]
+            if (is.element(labels[i], rownames(d))) {
+                ipos <- which(rownames(d) == labels[i])
+                Q[r, astart[p] + ipos] <- 1
+            }
+        }
+    }
+    a <- solve(Q, col, 1e-19)
+    for(i in 1:n) {
+        for(j in 1:n) {
+            sum <- 0
+            sumv <- 0
+            for(p in 1:k) {
+                d <- st[[p]]
+                if (is.element(labels[i], rownames(d)) & is.element(labels[j], rownames(d))) {
+                    ipos <- which(rownames(d) == labels[i])
+                    jpos <- which(rownames(d) == labels[j])
+                    sum <- sum + sp[p]*(a[p]*d[ipos, jpos] + a[astart[p] + ipos] + a[astart[p] + jpos])
+                    sumv <- sumv + sp[p]*(a[p]*d[ipos, jpos])*(a[p]*d[ipos, jpos])
+                }
+            }
+            X[i, j] <- sum/w[i, j]
+            V[i, j] <- sumv/(w[i, j] * w[i, j])
+            if (i == j)
+                X[i, j] <- V[i, j] <- 0
+        }
+    }
+    list(X, V)
+}
diff --git a/R/additive.R b/R/additive.R
new file mode 100644 (file)
index 0000000..efd72f9
--- /dev/null
@@ -0,0 +1,27 @@
+## additive.R (2011-10-11)
+
+##   Incomplete Distance Matrix Filling
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+.addit_ultra <- function(libs, X)
+{
+    if (is.matrix(X)) X <- as.dist(X)
+    X[is.na(X)] <- -1
+    X[X < 0] <- -1
+    X[is.nan(X)] <- -1
+    N <- attr(X, "Size")
+    labels <- attr(X, "Labels")
+    if (is.null(labels)) labels <- as.character(1:N)
+    m <- sum(X == -1)
+    ans <- .C(libs, as.double(X), as.integer(N),
+              as.integer(m), double(N*N), PACKAGE = "ape")
+    matrix(ans[[4]], N, N)
+}
+
+additive <- function(X) .addit_ultra("additive", X)
+
+ultrametric <- function(X) .addit_ultra("ultrametric", X)
diff --git a/R/as.bitsplits.R b/R/as.bitsplits.R
new file mode 100644 (file)
index 0000000..ace9af4
--- /dev/null
@@ -0,0 +1,58 @@
+## as.bitsplits.R (2011-10-19)
+
+##   Conversion Among Split Classes
+
+## Copyright 2011 Emmanuel Paradis
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+as.bitsplits <- function(x) UseMethod("as.bitsplits")
+
+as.bitsplits.prop.part <- function(x)
+{
+    foo <- function(vect, RAWVECT) {
+        res <- RAWVECT
+        for (y in vect) {
+            i <- ceiling(y/8)
+            res[i] <- res[i] | as.raw(2^(8 - ((y - 1) %% 8) - 1))
+        }
+        res
+    }
+
+    N <- length(x) # number of splits
+    n <- length(x[[1]]) # number of tips
+
+    nr <- ceiling(n/8)
+    mat <- raw(N * nr)
+    dim(mat) <- c(nr, N)
+
+    RAWVECT <- raw(nr)
+
+    for (i in 1:N) mat[, i] <- foo(x[[i]], RAWVECT)
+
+    ## add the n trivial splits of size 1... :
+    mat.bis <- raw(n * nr)
+    dim(mat.bis) <- c(nr, n)
+    for (i in 1:n) mat.bis[, i] <- foo(i, RAWVECT)
+
+    ## ... drop the trivial split of size n... :
+    mat <- cbind(mat.bis, mat[, -1, drop = FALSE])
+
+    ## ... update the split frequencies... :
+    freq <- attr(x, "number")
+    freq <- c(rep(freq[1L], n), freq[-1L])
+
+    ## ... and numbers:
+    N <- N + n - 1L
+
+    structure(list(matsplit = mat, labels = attr(x, "labels"),
+                   freq =  freq), class = "bitsplits")
+}
+
+print.bitsplits <- function(x, ...)
+{
+    cat('Object of class "bitsplits"\n')
+    cat('   ', length(x$labels), 'tips\n')
+    cat('   ', length(x$freq), 'partitions\n\n')
+}
diff --git a/R/is.compatible.R b/R/is.compatible.R
new file mode 100644 (file)
index 0000000..522e4f7
--- /dev/null
@@ -0,0 +1,36 @@
+## is.compatible.R (2011-10-11)
+
+##   Check Compatibility of Splits
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+is.compatible <- function(obj) UseMethod("is.compatible")
+
+is.compatible.bitsplits <- function(obj)
+{
+    m <- obj$matsplit
+    n <- ncol(m)
+    ntaxa <- length(obj$labels)
+    for (i in 1:(n - 1))
+        for (j in (i + 1):n)
+            if (!arecompatible(m[, i], m[, j], ntaxa))
+                return(FALSE)
+    TRUE
+}
+
+arecompatible <-function(x, y, n)
+{
+    msk <- !as.raw(2^(8 - (n %% 8)) - 1)
+
+    foo <- function(v) {
+        lv <- length(v)
+        v[lv] <- v[lv] & msk
+        as.integer(all(v == as.raw(0)))
+    }
+
+    nE <- foo(x & y) + foo(x & !y) + foo(!x & y) + foo(!x & !y)
+    if (nE == 1) TRUE else FALSE
+}
diff --git a/R/mvr.R b/R/mvr.R
new file mode 100644 (file)
index 0000000..a2218fc
--- /dev/null
+++ b/R/mvr.R
@@ -0,0 +1,49 @@
+## mvr.R (2011-10-11)
+
+##   Minimum Variance Reduction
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+mvr <- function(X, V)
+{
+    if (is.matrix(X)) X <- as.dist(X)
+    if (is.matrix(V)) V <- as.dist(V)
+    if (any(is.na(X)))
+        stop("missing values are not allowed in the distance matrix")
+    if (any(is.na(V)))
+        stop("missing values are not allowed in the variance matrix")
+    N <- attr(X, "Size")
+    labels <- attr(X, "Labels")
+    if (is.null(labels)) labels <- as.character(1:N)
+    ans <- .C("mvr", as.double(X), as.double(V), as.integer(N),
+              integer(2*N - 3), integer(2*N - 3), double(2*N - 3),
+              DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+    obj <- list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]],
+                tip.label = labels, Nnode = N - 2L)
+    class(obj) <- "phylo"
+    reorder(obj)
+}
+
+mvrs <- function(X, V, fs = 15)
+{
+    if (is.matrix(X)) X <- as.dist(X)
+    if (is.matrix(V)) V <- as.dist(V)
+
+    X[is.na(X)] <- -1
+    X[X < 0] <- -1
+    X[is.nan(X)] <- -1
+
+    N <- attr(X, "Size")
+    labels <- attr(X, "Labels")
+    if (is.null(labels)) labels <- as.character(1:N)
+    ans <- .C("mvrs", as.double(X), as.double(V), as.integer(N),
+              integer(2*N - 3), integer(2*N - 3), double(2*N - 3),
+              as.integer(fs), DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+    obj <- list(edge = cbind(ans[[4]], ans[[5]]), edge.length = ans[[6]],
+                tip.label = labels, Nnode = N - 2L)
+    class(obj) <- "phylo"
+    reorder(obj)
+}
diff --git a/R/njs.R b/R/njs.R
new file mode 100644 (file)
index 0000000..1f7ee8a
--- /dev/null
+++ b/R/njs.R
@@ -0,0 +1,31 @@
+## njs.R (2011-10-11)
+
+## Tree Reconstruction from Incomplete Distances With NJ* or bio-NJ*
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+.NJS_BIONJS <- function(libs, X, fs = 15)
+{
+    if (is.matrix(X)) X <- as.dist(X)
+    X[is.na(X)] <- -1
+    X[X < 0] <- -1
+    X[is.nan(X)] <- -1
+    N <- attr(X, "Size")
+    labels <- attr(X, "Labels")
+    if (is.null(labels)) labels <- as.character(1:N)
+    ans <- .C(libs, as.double(X), as.integer(N), integer(2*N - 3),
+              integer(2*N - 3), double(2*N - 3), as.integer(fs),
+              DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+    obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]],
+                tip.label = labels, Nnode = N - 2L)
+    class(obj) <- "phylo"
+    reorder(obj)
+}
+
+
+njs <- function(X, fs = 15) .NJS_BIONJS("njs", X = X, fs = fs)
+
+bionjs <- function(X, fs = 15) .NJS_BIONJS("bionjs", X = X, fs = fs)
diff --git a/R/treePop.R b/R/treePop.R
new file mode 100644 (file)
index 0000000..673af88
--- /dev/null
@@ -0,0 +1,25 @@
+## treePop.R (2011-10-11)
+
+##   Tree Popping
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+treePop <- function(obj)
+{
+    mf <- obj$matsplit
+    labels <- obj$labels
+    n <- length(labels)
+    imf <- as.integer(mf)
+    freq <- obj$freq
+    mimf <- matrix(imf, nrow(mf), ncol(mf))
+    ans <- .C("treePop", mimf, as.double(freq), as.integer(ncol(mf)),
+              as.integer(n), integer(2*n - 3), integer(2*n - 3),
+              double(2*n - 3), DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+    obj <- list(edge = cbind(ans[[5]], ans[[6]]), edge.length = ans[[7]],
+                tip.label = labels, Nnode = n - 2L)
+    class(obj) <- "phylo"
+    reorder(obj)
+}
diff --git a/R/triangMtd.R b/R/triangMtd.R
new file mode 100644 (file)
index 0000000..3f2d891
--- /dev/null
@@ -0,0 +1,42 @@
+## treePop.R (2011-10-11)
+
+## Tree Reconstruction With the Triangles Method
+
+## Copyright 2011 Andrei-Alin Popescu
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+triangMtd <- function(X)
+{
+    if (is.matrix(X)) X <- as.dist(X)
+    if (any(is.na(X)))
+        stop("missing values are not allowed in the distance matrix")
+    N <- attr(X, "Size")
+    labels <- attr(X, "Labels")
+    if (is.null(labels)) labels <- as.character(1:N)
+    ans <- .C("triangMtd", as.double(X), as.integer(N), integer(2*N - 3),
+              integer(2*N - 3), double(2*N - 3),
+              DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+    obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]],
+                tip.label = labels, Nnode = N - 2L)
+    class(obj) <- "phylo"
+    reorder(obj)
+}
+
+triangMtds <- function(X)
+{
+    if (is.matrix(X)) X <- as.dist(X)
+    X[is.na(X)] <- -1
+    X[X < 0] <- -1
+    N <- attr(X, "Size")
+    labels <- attr(X, "Labels")
+    if (is.null(labels)) labels <- as.character(1:N)
+    ans <- .C("triangMtds", as.double(X), as.integer(N), integer(2*N - 3),
+              integer(2*N - 3), double(2*N - 3),
+              DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
+    obj <- list(edge = cbind(ans[[3]], ans[[4]]), edge.length = ans[[5]],
+                tip.label = labels, Nnode = N - 2L)
+    class(obj) <- "phylo"
+    reorder(obj)
+}
diff --git a/Thanks b/Thanks
index b979f72f70942b62a3935ec15f8d751337e44d4d..0d9b4736e7f3d7c17000bb753c45e1373475a8b0 100644 (file)
--- a/Thanks
+++ b/Thanks
@@ -19,3 +19,6 @@ the French "Programme inter-EPST Bioinformatique" (2001-2003).
 
 Financial support was provided by the Department of Information
 Systems of IRD in form a "SPIRALES" project (2006).
+
+Financial support was provided by Google to Andrei-Alin Popescu during
+the 2011 Google Summer of Code (GSoC 2011).
\ No newline at end of file
diff --git a/man/SDM.Rd b/man/SDM.Rd
new file mode 100644 (file)
index 0000000..bceedf4
--- /dev/null
@@ -0,0 +1,34 @@
+\name{SDM}
+\alias{SDM}
+\title{Construction of Consensus Distance Matrix With SDM}
+\description{
+  This function implements the SDM method of Criscuolo et al. (2006) for
+  a set of n distance matrices.
+}
+\usage{
+SDM(...)
+}
+\arguments{
+  \item{\dots}{2n elements (with n > 1), the first n elements are the
+    distance matrices, the next n elements are the sequence length from
+    which the matrices have been estimated (can be seen as a degree of
+    confidence in matrices).}
+}
+\details{
+  Reconstructs a consensus distance matrix from a set of input distance
+  matrices on overlapping sets of taxa. Potentially missing values in
+  the supermatrix are represented by \code{NA}. An error is returned if
+  the input distance matrices can not resolve to a consensus matrix.
+}
+\value{
+  a 2-element list containing a distance matrix labelled by the union of
+  the set of taxa of the input distance matrices, and a variance matrix
+  associated to the returned distance matrix.
+}
+\references{
+  Criscuolo, A., Berry, V., Douzery, E. J. P. , and Gascuel, O. (2006)
+  SDM: A fast distance-based approach for (super)tree building in
+  phylogenomics. \emph{Systematic Biology}, \bold{55}, 740--755.
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{models}
diff --git a/man/additive.Rd b/man/additive.Rd
new file mode 100644 (file)
index 0000000..df8c649
--- /dev/null
@@ -0,0 +1,26 @@
+\name{additive}
+\alias{additive}
+\alias{ultrametric}
+\title{Incomplete Distance Matrix Filling}
+\description{
+  Fills missing entries from incomplete distance matrix using the
+  additive or the ultrametric procedure (see reference for details).
+}
+\usage{
+additive(X)
+ultrametric(X)
+}
+\arguments{
+  \item{X}{a distance matrix or an object of class \code{"dist"}.}
+}
+\value{
+  a distance matrix.
+}
+\references{
+  Makarenkov, V. and Lapointe, F.-J. (2004) A weighted least-squares
+  approach for inferring phylogenies from incomplete distance
+  matrices. \emph{Bioinformatics}, \bold{20}, 2113--2121.
+  \url{http://dx.doi.org/10.1093/bioinformatics/bth211}
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{manip}
index 41351daa6a8dbf856511793080cff09ceec308fc..9242c9f6450a093e4b16784ad78d781bf30064c9 100644 (file)
@@ -4,9 +4,9 @@
 \title{Global Comparison of two Phylogenies}
 \usage{
 \method{all.equal}{phylo}(target, current, use.edge.length = TRUE,
-                          use.tip.label = TRUE, index.return = FALSE,
-                          tolerance = .Machine$double.eps ^ 0.5,
-                          scale = NULL, \dots)
+                   use.tip.label = TRUE, index.return = FALSE,
+                   tolerance = .Machine$double.eps ^ 0.5,
+                   scale = NULL, \dots)
 }
 \arguments{
   \item{target}{an object of class \code{"phylo"}.}
diff --git a/man/as.bitsplits.Rd b/man/as.bitsplits.Rd
new file mode 100644 (file)
index 0000000..fed91dd
--- /dev/null
@@ -0,0 +1,28 @@
+\name{as.bitsplits}
+\alias{as.bitsplits}
+\alias{as.bitsplits.prop.part}
+\alias{print.bitsplits}
+\title{Conversion Among Split Classes}
+\description{
+  \code{as.bitsplits} is a generic function with a method for objects of
+  class \code{"prop.part"}.
+}
+\usage{
+as.bitsplits(x)
+\method{as.bitsplits}{prop.part}(x)
+\method{print}{bitsplits}(x, ...)
+}
+\arguments{
+  \item{x}{an object of the appropriate class.}
+  \item{\dots}{further arguments passed to or from other methods.}
+}
+\value{
+  an object of class \code{"bitsplits"} or NULL for \code{print}.
+}
+\author{Emmanuel Paradis}
+\examples{
+tr <- rtree(20)
+pp <- prop.part(tr)
+as.bitsplits(pp)
+}
+\keyword{manip}
index 6e0975e1ff3de650e251565d70ab743f6d9cdf73..11ab1d217426952ab911c4da510bf3f8fe759169 100644 (file)
@@ -35,7 +35,7 @@ bd.time(phy, birth, death, BIRTH = NULL, DEATH = NULL,
   primitives can be found in the help page of \code{\link{yule.time}}.
 
   The model is fitted by minimizing the least squares deviation between
-  the observed and predicted distributions of branching times. These
+  the observed and the predicted distributions of branching times. These
   computations rely heavily on numerical integrations. If \code{fast =
   FALSE}, integrations are done with R's \code{\link[stats]{integrate}}
   function. If \code{fast = TRUE}, a faster but less accurate function
diff --git a/man/is.compatible.Rd b/man/is.compatible.Rd
new file mode 100644 (file)
index 0000000..e8f2af7
--- /dev/null
@@ -0,0 +1,25 @@
+\name{is.compatible}
+\alias{is.compatible}
+\alias{is.compatible.bitsplits}
+\alias{arecompatible}
+\title{Check Compatibility of Splits}
+\description{
+  \code{is.compatible} is a generic function with a method for the class
+  \code{"bitsplits"}. It checks whether a set of splits is compatible
+  using the \code{arecompatible} function.
+}
+\usage{
+is.compatible(obj)
+\method{is.compatible}{bitsplits}(obj)
+arecompatible(x, y, n)
+}
+\arguments{
+  \item{obj}{an object of class \code{"bitsplits"}.}
+  \item{x, y}{a vector of mode raw\code{}.}
+  \item{n}{the number of taxa in the splits.}
+}
+\value{
+  \code{TRUE} if the splits are compatible, \code{FALSE} otherwise.
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{manip}
diff --git a/man/mvr.Rd b/man/mvr.Rd
new file mode 100644 (file)
index 0000000..68f0bc9
--- /dev/null
@@ -0,0 +1,33 @@
+\name{mvr}
+\alias{mvr}
+\alias{mvrs}
+\title{Minimum Variance Reduction}
+\description{
+  Phylogenetic tree construction based on the minimum variance reduction.
+}
+\usage{
+mvr(X, V)
+mvrs(X, V, fs = 15)
+}
+\arguments{
+  \item{X}{a distance matrix.}
+  \item{V}{a variance matrix.}
+  \item{fs}{agglomeration criterion parameter.}
+}
+\details{
+  The MVR method can be seen as a version of BIONJ which is not
+  restricted to the Poison model of variance (Gascuel 2000).
+}
+\value{
+  an object of class \code{"phylo"}.
+}
+\references{
+  Criscuolo, A. and Gascuel, O. (2008). Fast NJ-like algorithms to deal
+  with incomplete distance matrices. \emph{BMC Bioinformatics}, 9.
+
+  Gascuel, O. (2000). Data model and classification by trees: the
+  minimum variance reduction (MVR) method. \emph{Journal of
+    Classification}, \bold{17}, 67--99.
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{models}
diff --git a/man/njs.Rd b/man/njs.Rd
new file mode 100644 (file)
index 0000000..90a3444
--- /dev/null
@@ -0,0 +1,32 @@
+\name{njs}
+\alias{njs}
+\alias{bionjs}
+\title{Tree Reconstruction from Incomplete Distances With NJ* or bio-NJ*}
+\description{
+  Reconstructs a phylogenetic tree from a distance matrix with possibly
+  missing values.
+}
+\usage{
+njs(X, fs = 15)
+bionjs(X, fs = 15)
+}
+\arguments{
+  \item{X}{a distance matrix.}
+  \item{fs}{argument \emph{s} of the agglomerative criterion.}
+}
+\details{
+  Missing values represented by either \code{NA} or any negative number.
+
+  Basically, the Q* criterion is applied to all the pairs of leaves, and
+  the \emph{s} highest scoring ones are chosen for further analysis by
+  the agglomeration criteria that better handle handle missing
+  distances (see references for details).
+}
+\value{
+  an object of class \code{"phylo"}.
+}
+\references{
+  \url{http://www.biomedcentral.com/1471-2105/9/166}
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{models}
diff --git a/man/treePop.Rd b/man/treePop.Rd
new file mode 100644 (file)
index 0000000..1262414
--- /dev/null
@@ -0,0 +1,19 @@
+\name{treePop}
+\alias{treePop}
+\title{Tree Popping}
+\description{
+  Method for reconstructing phylogenetic trees from an object of class
+  splits using tree popping.
+}
+\usage{
+treePop(obj)
+}
+\arguments{
+  \item{obj}{an object of class \code{"bitsplit"}.}
+}
+\value{
+  an object of class "phylo" which displays all the splits
+  in the input object.
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{models}
diff --git a/man/triangMtd.Rd b/man/triangMtd.Rd
new file mode 100644 (file)
index 0000000..d90ff1e
--- /dev/null
@@ -0,0 +1,23 @@
+\name{triangMtd}
+\alias{triangMtd}
+\alias{triangMtds}
+\title{Tree Reconstruction Based on the Triangles Method}
+\usage{
+triangMtd(X)
+triangMtds(X)
+}
+\arguments{
+  \item{X}{a distance matrix}.
+}
+\description{
+  Fast distance-based costruction method. Should only be used when
+  distance measures are fairly reliable.
+}
+\value{
+  an object of class \code{"phylo"}.
+}
+\references{
+  \url{http://archive.numdam.org/ARCHIVE/RO/RO_2001__35_2/RO_2001__35_2_283_0/RO_2001__35_2_283_0.pdf}
+}
+\author{Andrei Popescu \email{niteloserpopescu@gmail.com}}
+\keyword{models}
diff --git a/src/additive.c b/src/additive.c
new file mode 100644 (file)
index 0000000..d2fcfaf
--- /dev/null
@@ -0,0 +1,66 @@
+/* additive.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+void additive(double *dd, int* np,int* mp,double *ret)//d received as dist object, -1 for missing entries
+{
+    int n=*np;
+    int m=*mp;
+    int i=0,j=0;
+    double max=dd[0];
+    double d[n][n];
+    for(i=1;i<n;i++)
+    {d[i-1][i-1]=0;
+     for(j=i+1;j<=n;j++)
+      {
+         d[i-1][j-1]=d[j-1][i-1]=dd[give_index(i,j,n)];
+         if(dd[give_index(i,j,n)]>max)
+          {
+            max=dd[give_index(i,j,n)];
+          }
+      }
+    }
+    d[n-1][n-1]=0;
+
+  int entrCh=0;
+   do{
+    entrCh=0;
+    for(i=0;i<n-1;i++)
+     for(j=i+1;j<n;j++)
+      {
+         if(d[i][j]!=-1)continue;
+         double minimax=max;
+         int k=0;
+         int sw=0;
+         for(k=0;k<n;k++)
+          {int l=0;
+             if(d[i][k]==-1 || d[j][k]==-1)continue;
+           for(l=0;l<n;l++)
+            {
+             if(k==l || d[k][l]==-1 || d[i][l]==-1 || d[j][l]==-1)continue;
+             sw=1;
+             double mx=(((d[i][k]+d[j][l])>(d[i][l]+d[j][k]))?(d[i][k]+d[j][l]):(d[i][l]+d[j][k]));
+             mx-=d[k][l];
+             if(mx<minimax){minimax=mx;}
+            }
+          }
+        if(sw==1)
+          {
+            d[i][j]=d[j][i]=minimax;
+            m--;
+            entrCh=1;
+          }
+      }
+   }while(entrCh==1);
+  int ij=0;
+  for(i=0;i<n;i++)
+   for(j=0;j<n;j++)
+    {
+       ret[ij++]=d[i][j];
+    }
+}
diff --git a/src/ape.c b/src/ape.c
new file mode 100644 (file)
index 0000000..c507b65
--- /dev/null
+++ b/src/ape.c
@@ -0,0 +1,14 @@
+/* ape.c    2011-10-11 */
+
+/* Copyright 2011 Emmanuel Paradis */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+int give_index(int i, int j, int n)
+{
+       if (i > j) return(DINDEX(j, i));
+       else return(DINDEX(i, j));
+}
diff --git a/src/ape.h b/src/ape.h
new file mode 100644 (file)
index 0000000..a0d0720
--- /dev/null
+++ b/src/ape.h
@@ -0,0 +1,26 @@
+/* ape.h    2011-10-11 */
+
+/* Copyright 2011 Emmanuel Paradis */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include <R.h>
+
+#define DINDEX(i, j) n*(i - 1) - i*(i - 1)/2 + j - i - 1
+
+/* in ape.c */
+int give_index(int i, int j, int n);
+
+/* in njs.c */
+void choosePair(double* D, int n, double* R, int* s, int* sw, int* x, int* y, int fS);
+double cnxy(int x, int y, int n, double* D);
+int mxy(int x,int y, int n, double* D);
+double nxy(int x, int y, int n, double* D);
+int cxy(int x, int y, int n, double* D);
+
+/* in triangMtd.c */
+void triangMtd(double* d, int* np, int* ed1, int* ed2, double* edLen);
+int * getPathBetween(int x, int y, int n, int* ed1, int* ed2, int numEdges);
+int give_indexx(int i, int j, int n); /* a variant of the above */
+
diff --git a/src/bionjs.c b/src/bionjs.c
new file mode 100644 (file)
index 0000000..b3f599b
--- /dev/null
@@ -0,0 +1,394 @@
+/* bionjs.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+void bionjs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int* fsS)
+{       //assume missing values are denoted by -1
+       double *S,*R , *v,*new_v, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y;
+       int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
+        /*for(i=0;i<n*(n-1)/2;i++)
+          {if(isNA(D[i])){D[i]=-1;}
+          }*/
+        int *s;//s contains |Sxy|, which is all we need for agglomeration
+        double *newR;
+        int *newS;
+        int fS=*fsS;
+
+       R = &Sdist;
+       new_dist = &Ndist;
+       otu_label = &o_l;
+        n = *N;
+       cur_nod = 2*n - 2;
+
+       R = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        v = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        new_v = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        S = (double*)R_alloc(n + 1, sizeof(double));
+        newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       otu_label = (int*)R_alloc(n + 1, sizeof(int));
+        s = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
+        newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
+
+       for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
+
+       k = 0;
+        //populate the v matrix
+        for(i=1;i<n;i++)
+         for(j=i+1;j<=n;j++)
+          {
+           v[give_index(i,j,n)]=D[give_index(i,j,n)];
+          }
+        //compute Sxy and Rxy
+        for(i=0;i<n*(n-1)/2;i++)
+          {newR[i]=0;
+           newS[i]=0;
+           s[i]=0;
+           R[i]=0;
+          }
+
+        for(i=1;i<n;i++)
+         for(j=i+1;j<=n;j++)
+         {//algorithm assumes i,j /in Sij, so skip pair if it is not known
+          if(D[give_index(i,j,n)]==-1)
+            {
+              continue;
+            }
+          for(k=1;k<=n;k++)
+           {//ij is the pair for which we compute
+            //skip k if we do not know the distances between it and i AND j
+
+             if(k==i || k==j)
+               {
+                  s[give_index(i,j,n)]++;
+                  continue;
+               }
+              if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue;
+              //Rprintf("%i\n",k);
+              s[give_index(i,j,n)]++;
+              R[give_index(i,j,n)]+=D[give_index(i,k,n)];
+              R[give_index(i,j,n)]+=D[give_index(j,k,n)];
+           }
+         }
+
+        /*for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+
+        k=0;
+        int sw=1;//if 1 then incomplete
+       while (n > 3) {
+
+               ij = 0;
+                for(i=1;i<n;i++)
+                 for(j=i+1;j<=n;j++)
+                  {newR[give_index(i,j,n)]=0;
+                   newS[give_index(i,j,n)]=0;
+                  }
+               smallest_S = -1e50;
+                if(sw==0)
+                    for(i=1;i<=n;i++)
+                       {S[i]=0;
+                       }
+
+               B=n-2;
+                if(sw==1)
+                     {
+                      choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS);
+                     }
+                 else{ //Rprintf("distance matrix is now complete\n");
+                        for (i=1;i<=n;i++)
+                         for(j=1;j<=n;j++)
+                           {if(i==j)continue;
+                             //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n));
+                             //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
+                             S[i]+=D[give_index(i,j,n)];
+                           }
+                        B=n-2;
+                        //Rprintf("n=%i,B=%f",n,B);
+                       for (i = 1; i < n; i++) {
+                        for (j = i + 1; j <= n; j++) {
+                             //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
+                                A=S[i]+S[j]-B*D[give_index(i,j,n)];
+                                //Rprintf("Q[%i,%i]=%f\n",i,j,A);
+                               if (A > smallest_S) {
+                                       OTU1 = i;
+                                       OTU2 = j;
+                                       smallest_S = A;
+                                       smallest = ij;
+                               }
+                               ij++;
+                       }
+                       }
+                     }
+
+                /*Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+                //update R and S, only if matrix still incomplete
+                if(sw==1)
+                for(i=1;i<n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 for(j=i+1;j<=n;j++)
+                  {if(j==OTU1 || j==OTU2)continue;
+                    if(D[give_index(i,j,n)]==-1)continue;
+                     if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1)
+                      {//OTU1 was considered for Rij, so now subtract
+                       R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]);
+                       s[give_index(i,j,n)]--;
+                      }
+                     if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1)
+                      {//OTU2 was considered for Rij, so now subtract
+                       R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]);
+                       s[give_index(i,j,n)]--;
+                      }
+                  }
+                }
+
+               edge2[k] = otu_label[OTU1];
+               edge2[k + 1] = otu_label[OTU2];
+               edge1[k] = edge1[k + 1] = cur_nod;
+
+                double sum=0;
+                double lamb=0;//the parameter used for matrix reduction
+                double lambSum=0;
+                for(i=1;i<=n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 if(D[give_index(OTU1,i,n)]==-1 || D[give_index(OTU2,i,n)]==-1)continue;
+                 sum+=(D[give_index(OTU1,i,n)]-D[give_index(OTU2,i,n)]);
+                 lambSum+=(v[give_index(OTU2,i,n)]-v[give_index(OTU1,i,n)]);
+                }
+                //if we stil have incomplete distances
+                if(sw==1)
+                {
+                 lamb=0.5+(1/(2*(s[give_index(OTU1,OTU2,n)]-2)*v[give_index(OTU2,OTU1,n)]))*lambSum;
+                }else{
+                 lamb=0.5+(1/(2*(n-2)*v[give_index(OTU2,OTU1,n)]))*lambSum;
+                     }
+
+                //although s was updated above, s[otu1,otu2] has remained unchanged
+                //so it is safe to use it here
+                //if complete distanes, use N-2, else use S
+                int down=B;
+                if(sw==1){down=s[give_index(OTU1,OTU2,n)]-2;}
+                if(down==0)
+                  {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2);
+                  }
+                //Rprintf("down=%f\n",B);
+                sum*=(1.0/(2*(down)));
+                //Rprintf("sum=%f\n",sum);
+                double dxy=D[give_index(OTU1,OTU2,n)]/2;
+
+                //Rprintf("R[%i,%i]:%f \n",OTU1,OTU2,sum);
+               edge_length[k] = dxy+sum;//OTU1
+                //Rprintf("l1:%f \n",edge_length[k]);
+               edge_length[k + 1] = dxy-sum;//OTU2
+                //Rprintf("l2:%f \n",edge_length[k+1]);
+               //no need to change distance matrix update for complete distance
+               //case, as pairs will automatically fall in the right cathegory
+
+                //OTU1=x, OTU2=y from formulas
+               A = D[give_index(OTU1,OTU2,n)];
+               ij = 0;
+               for (i = 1; i <= n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                        if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1)
+                         {
+                            new_dist[ij]= lamb*(D[give_index(OTU1,i,n)]-edge_length[k])+(1-lamb)*(D[give_index(OTU2,i,n)]-edge_length[k+1]);
+                            new_v[ij]=lamb*v[give_index(OTU1,i,n)]+(1-lamb)*v[give_index(OTU2,i,n)]-lamb*(1-lamb)*v[give_index(OTU1,OTU2,n)];
+                         }else{
+                         if(D[give_index(OTU1,i,n)]!=-1)
+                                {
+                                 new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k];
+                                 new_v[ij]=v[give_index(OTU1,i,n)];
+                                }else{
+                                      if(D[give_index(OTU2,i,n)]!=-1)
+                                        {
+                                            new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1];
+                                            new_v[ij]=v[give_index(OTU2,i,n)];
+                                        }else{new_dist[ij]=-1;new_v[ij]=-1;}
+                                     }
+                              }
+
+                       ij++;
+               }
+
+                for (i = 1; i < n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                       for (j = i + 1; j <= n; j++) {
+                               if (j == OTU1 || j == OTU2) continue;
+                               new_dist[ij] = D[DINDEX(i, j)];
+                                new_v[ij]=v[give_index(i,j,n)];
+                               ij++;
+                       }
+               }
+
+                /*for(i=1;i<n-1;i++)
+                {
+                  for(j=i+1;j<=n-1;j++)
+                   {Rprintf("%f ",new_dist[give_index(i,j,n-1)]);
+                   }
+                  Rprintf("\n");
+                }*/
+                //compute Rui, only if distance matrix is still incomplete
+                ij=0;
+                if(sw==1)
+                for(i=2;i<n;i++)
+                  {
+                   ij++;
+                   if(new_dist[give_index(i,1,n-1)]==-1)continue;
+
+                   for(j=1;j<n;j++)
+                     {
+                       if(j==1 || j==i)
+                       {
+                         newS[give_index(1,i,n-1)]++;
+                         continue;
+                       }
+                       if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1)
+                        {
+                          newS[give_index(1,i,n-1)]++;
+                          newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
+                          newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
+                        }
+                     }
+                  }
+                //fill in the rest of R and S, again only if distance matrix still
+                //incomplete
+                if(sw==1)
+                for(i=1;i<n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 for(j=i+1;j<=n;j++)
+                  {if(j==OTU1 || j==OTU2)continue;
+                   newR[ij]=R[give_index(i,j,n)];
+                   newS[ij]=s[give_index(i,j,n)];
+                   ij++;
+                  }
+                }
+                //update newR and newS with the new taxa, again only if distance
+                //matrix is still incomplete
+                if(sw==1)
+                for(i=2;i<n-1;i++)
+                {if(new_dist[give_index(1,i,n-1)]==-1)continue;
+                 for(j=i+1;j<=n-1;j++)
+                  {if(new_dist[give_index(1,j,n-1)]==-1)continue;
+                   newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]);
+                   newS[give_index(i,j,n-1)]++;
+                  }
+                }
+               /* compute the branch lengths */
+
+
+
+               /* update before the next loop
+                  (we are sure that OTU1 < OTU2) */
+               if (OTU1 != 1)
+                       for (i = OTU1; i > 1; i--)
+                               otu_label[i] = otu_label[i - 1];
+               if (OTU2 != n)
+                       for (i = OTU2; i < n; i++)
+                               otu_label[i] = otu_label[i + 1];
+               otu_label[1] = cur_nod;
+
+
+
+               n--;
+               for (i = 0; i < n*(n - 1)/2; i++)
+                  {
+                    D[i] = new_dist[i];
+                    v[i] = new_v[i];
+                    if(sw==1)
+                       {
+                        R[i] = newR[i];
+                        s[i] = newS[i];
+                       }
+                  }
+               cur_nod--;
+               k = k + 2;
+       }
+        int dK=0;//number of known distances in final distance matrix
+        int iUK=-1;//index of unkown distance, if we have one missing distance
+        int iK=-1;//index of only known distance, only needed if dK==1
+        for (i = 0; i < 3; i++) {
+               edge1[*N*2 - 4 - i] = cur_nod;
+               edge2[*N*2 - 4 - i] = otu_label[i + 1];
+                if(D[i]!=-1){dK++;iK=i;}else{iUK=i;}
+       }
+        if(dK==2)
+         {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown
+          //and edge weights of three edges are a,b,c, then any b,c>0 that
+          //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for
+          //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some
+          //algebra we get that we can set the missing distance equal to the
+          //maximum of the already present distances
+            double max=-1e50;
+          for(i=0;i<3;i++)
+            {if(i==iUK)continue;
+             if(D[i]>max)max=D[i];
+            }
+          D[iUK]=max;
+         }
+        if(dK==1)
+         {//through similar motivation as above, if we have just one known distance
+          //we set the other two distances equal to it
+          for(i=0;i<3;i++)
+            {if(i==iK)continue;
+             D[i]=D[iK];
+            }
+         }
+        if(dK==0)
+         {//no distances are known, we just set them to 1
+          for(i=0;i<3;i++)
+           {D[i]=1;
+           }
+         }
+        edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
+       edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
+       edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
+}
+
+
diff --git a/src/mvr.c b/src/mvr.c
new file mode 100644 (file)
index 0000000..5f9fc0c
--- /dev/null
+++ b/src/mvr.c
@@ -0,0 +1,189 @@
+/* mvr.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+void mvr(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_length)
+{
+       double *S, Sdist, *new_v, Ndist, *new_dist, A, B, smallest_S, x, y;
+       int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
+
+       S = &Sdist;
+       new_dist = &Ndist;
+       otu_label = &o_l;
+        n = *N;
+       cur_nod = 2*n - 2;
+
+       S = (double*)R_alloc(n + 1, sizeof(double));
+       new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        new_v = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       otu_label = (int*)R_alloc(n + 1, sizeof(int));
+
+       for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
+
+       k = 0;
+
+       while (n > 3) {
+
+                /*for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  } */
+
+               for (i = 1; i <= n; i++)
+                {double sum=0;
+                  for( j=1;j<=n;j++)
+                   {if(i==j)continue;
+                     //Rprintf("index(%i,%i)=%i\n",i,j,give_index(i,j,n));
+                     //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
+                     sum+=D[give_index(i,j,n)];
+                   }
+                S[i]=sum;
+                //Rprintf("\n");
+                //Rprintf("S[%i]=%f\n",i,S[i]);
+                //Rprintf("\n");
+                }
+               ij = 0;
+               smallest_S = 1e50;
+               B = n - 2;
+               for (i = 1; i < n; i++) {
+                       for (j = i + 1; j <= n; j++) {
+
+                               A = B*D[ij] - S[i] - S[j];
+                                /*Rprintf("D[ij]=%f\n",D[ij]);
+                                Rprintf("S[%i]=%f\n",i,S[i]);
+                                Rprintf("S[%i]=%f\n",j,S[j]);
+                                Rprintf("B=%f\n",B);
+                                Rprintf("A=%f\n",(B*D[ij] - S[i] - S[j]));
+                                Rprintf("Q[%i,%i]=%f\n",i,j,A);*/
+                               if (A < smallest_S) {
+                                       OTU1 = i;
+                                       OTU2 = j;
+                                       smallest_S = A;
+                                       smallest = ij;
+                               }
+                               ij++;
+                       }
+               }
+
+                /*Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("v[%i,%i]=%f ",i,j,v[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+
+               edge2[k] = otu_label[OTU1];
+               edge2[k + 1] = otu_label[OTU2];
+               edge1[k] = edge1[k + 1] = cur_nod;
+
+               /* get the distances between all OTUs but the 2 selected ones
+                  and the latter:
+                  a) get the sum for both
+                  b) compute the distances for the new OTU */
+                double miu=0;
+                double miuSum=0;
+                for(i=1;i<=n;i++)
+                 {
+                   if(i == OTU1 || i==OTU2)continue;
+                   //Rprintf("index(%i,%i)=%i index(%i,%i)=%i",i,OTU1,give_index(i,OTU1,n),i,OTU2,give_index(i,OTU2,n));
+                   miuSum+=(1/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]));
+                 }
+                miuSum=1/miuSum;
+                miu=miuSum/2;
+
+                double eLenSum=0;
+                for(i=1;i<=n;i++)
+                 {
+                   if(i == OTU1 || i==OTU2)continue;
+
+                   double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
+                   eLenSum+=wi*(D[give_index(i,OTU1,n)]-D[give_index(i,OTU2,n)]);
+                 }
+
+                edge_length[k]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
+
+                eLenSum=0;
+                for(i=1;i<=n;i++)
+                 {
+                   if(i == OTU1 || i==OTU2)continue;
+
+                   double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
+                   eLenSum+=wi*(D[give_index(i,OTU2,n)]-D[give_index(i,OTU1,n)]);
+                 }
+
+                edge_length[k+1]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
+
+               A = D[smallest];
+               ij = 0;
+               for (i = 1; i <= n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                       double xi = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */
+                       double yi = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */
+                        double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
+                       new_dist[ij] = lamb*(xi-edge_length[k])+(1-lamb)*(yi-edge_length[k]);
+                        new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
+                       ij++;
+               }
+               /* compute the branch lengths */
+                //Rprintf("l2:%f \n",edge_length[k+1]);
+               /* update before the next loop
+                  (we are sure that OTU1 < OTU2) */
+               if (OTU1 != 1)
+                       for (i = OTU1; i > 1; i--)
+                               otu_label[i] = otu_label[i - 1];
+               if (OTU2 != n)
+                       for (i = OTU2; i < n; i++)
+                               otu_label[i] = otu_label[i + 1];
+               otu_label[1] = cur_nod;
+
+               for (i = 1; i < n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                       for (j = i + 1; j <= n; j++) {
+                               if (j == OTU1 || j == OTU2) continue;
+                               new_dist[ij] = D[DINDEX(i, j)];
+                                new_v[ij]=v[give_index(i,j,n)];
+                               ij++;
+                       }
+               }
+
+               n--;
+               for (i = 0; i < n*(n - 1)/2; i++)
+                 {D[i] = new_dist[i];
+                  v[i] = new_v[i];
+                 }
+               cur_nod--;
+               k = k + 2;
+       }
+
+       for (i = 0; i < 3; i++) {
+               edge1[*N*2 - 4 - i] = cur_nod;
+               edge2[*N*2 - 4 - i] = otu_label[i + 1];
+       }
+
+       edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
+       edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
+       edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
+}
+
diff --git a/src/mvrs.c b/src/mvrs.c
new file mode 100644 (file)
index 0000000..d2aa342
--- /dev/null
@@ -0,0 +1,397 @@
+/* mvrs.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_length,int* fsS)
+{       //assume missing values are denoted by -1
+
+        double *S,*R ,*new_v, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y;
+       int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
+        /*for(i=0;i<n*(n-1)/2;i++)
+          {if(isNA(D[i])){D[i]=-1;}
+          }*/
+        int *s;//s contains |Sxy|, which is all we need for agglomeration
+        double *newR;
+        int *newS;
+        int fS=*fsS;
+
+       R = &Sdist;
+       new_dist = &Ndist;
+       otu_label = &o_l;
+        n = *N;
+       cur_nod = 2*n - 2;
+
+       R = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        new_v = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        S = (double*)R_alloc(n + 1, sizeof(double));
+        newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       otu_label = (int*)R_alloc(n + 1, sizeof(int));
+        s = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
+        newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
+
+       for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
+
+       k = 0;
+        //compute Sxy and Rxy
+
+        for(i=0;i<n*(n-1)/2;i++)
+          {newR[i]=0;
+           newS[i]=0;
+           s[i]=0;
+           R[i]=0;
+          }
+
+        for(i=1;i<n;i++)
+         for(j=i+1;j<=n;j++)
+         {//algorithm assumes i,j /in Sij, so skip pair if it is not known
+          if(D[give_index(i,j,n)]==-1)
+            {
+              continue;
+            }
+          for(k=1;k<=n;k++)
+           {//ij is the pair for which we compute
+            //skip k if we do not know the distances between it and i AND j
+
+             if(k==i || k==j)
+               {
+                  s[give_index(i,j,n)]++;
+                  //Rprintf("%i",s[give_index(i,j,n)]);
+
+                  continue;
+               }
+              if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue;
+              //Rprintf("%i\n",k);
+              s[give_index(i,j,n)]++;
+              R[give_index(i,j,n)]+=D[give_index(i,k,n)];
+              R[give_index(i,j,n)]+=D[give_index(j,k,n)];
+              //Rprintf("%i",s[give_index(i,j,n)]);
+              //Rprintf("%f",R[give_index(i,j,n)]);
+
+           }
+         }
+
+        /*for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+        k=0;
+        int sw=1;//if 1 then incomplete
+       while (n > 3) {
+
+               ij = 0;
+                for(i=1;i<n;i++)
+                 for(j=i+1;j<=n;j++)
+                  {newR[give_index(i,j,n)]=0;
+                   newS[give_index(i,j,n)]=0;
+                  }
+
+               smallest_S = -1e50;
+                if(sw==0)
+                    for(i=1;i<=n;i++)
+                       {S[i]=0;
+                       }
+
+               B=n-2;
+                if(sw==1)
+                     {
+                      choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS);
+                     }
+                 else{ //Rprintf("distance matrix is now complete\n");
+                        for (i=1;i<=n;i++)
+                         for(j=1;j<=n;j++)
+                           {if(i==j)continue;
+                          //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n));
+                          //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
+                             S[i]+=D[give_index(i,j,n)];
+                           }
+                        B=n-2;
+                        //Rprintf("n=%i,B=%f",n,B);
+                       for (i = 1; i < n; i++) {
+                        for (j = i + 1; j <= n; j++) {
+                                //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
+                                A=S[i]+S[j]-B*D[give_index(i,j,n)];
+                                Rprintf("Q[%i,%i]=%f\n",i,j,A);
+                               if (A > smallest_S) {
+                                       OTU1 = i;
+                                       OTU2 = j;
+                                       smallest_S = A;
+                                       smallest = ij;
+                               }
+                               ij++;
+                       }
+                       }
+                     }
+                if(s[give_index(OTU1,OTU2,n)]<=2)
+                  {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2);
+                  }
+                //Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
+
+                /*for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+
+                //update R and S, only if matrix still incomplete
+                if(sw==1)
+                for(i=1;i<n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 for(j=i+1;j<=n;j++)
+                  {if(j==OTU1 || j==OTU2)continue;
+                    if(D[give_index(i,j,n)]==-1)continue;
+                     if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1)
+                      {//OTU1 was considered for Rij, so now subtract
+                       R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]);
+                       s[give_index(i,j,n)]--;
+                      }
+                     if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1)
+                      {//OTU2 was considered for Rij, so now subtract
+                       R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]);
+                       s[give_index(i,j,n)]--;
+                      }
+                  }
+                }
+
+               edge2[k] = otu_label[OTU1];
+               edge2[k + 1] = otu_label[OTU2];
+               edge1[k] = edge1[k + 1] = cur_nod;
+
+                double miu=0;
+                double miuSum=0;
+                for(i=1;i<=n;i++)
+                 {
+                   if(i == OTU1 || i==OTU2)continue;
+                   if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
+                   //Rprintf("index(%i,%i)=%i index(%i,%i)=%i",i,OTU1,give_index(i,OTU1,n),i,OTU2,give_index(i,OTU2,n));
+                   miuSum+=(1/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]));
+                 }
+                miuSum=1/miuSum;
+                miu=miuSum/2;
+
+                double eLenSum=0;
+                for(i=1;i<=n;i++)
+                 {
+                   if(i == OTU1 || i==OTU2)continue;
+                   if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
+                   double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
+                   eLenSum+=wi*(D[give_index(i,OTU1,n)]-D[give_index(i,OTU2,n)]);
+                 }
+
+                edge_length[k]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
+
+                eLenSum=0;
+                for(i=1;i<=n;i++)
+                 {
+                   if(i == OTU1 || i==OTU2)continue;
+                   if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
+                   double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
+                   eLenSum+=wi*(D[give_index(i,OTU2,n)]-D[give_index(i,OTU1,n)]);
+                 }
+
+                edge_length[k+1]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
+
+               //no need to change distance matrix update for complete distance
+               //case, as pairs will automatically fall in the right cathegory
+
+                //OTU1=x, OTU2=y from formulas
+               A = D[give_index(OTU1,OTU2,n)];
+               ij = 0;
+               for (i = 1; i <= n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                        if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1)
+                         {  double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
+                            new_dist[ij]= lamb*(D[give_index(OTU1,i,n)]-edge_length[k])+(1-lamb)*(D[give_index(OTU2,i,n)]-edge_length[k+1]);
+                            new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
+                         }else{
+                         if(D[give_index(OTU1,i,n)]!=-1)
+                                {
+                                 new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k];
+                                 new_v[ij]=v[give_index(OTU1,i,n)];
+                                }else{
+                                      if(D[give_index(OTU2,i,n)]!=-1)
+                                        {
+                                            new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1];
+                                            new_v[ij]=v[give_index(OTU2,i,n)];
+                                        }else{new_dist[ij]=-1;new_v[ij]=-1;}
+                                     }
+                              }
+
+                       ij++;
+               }
+
+                for (i = 1; i < n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                       for (j = i + 1; j <= n; j++) {
+                               if (j == OTU1 || j == OTU2) continue;
+                               new_dist[ij] = D[DINDEX(i, j)];
+                                new_v[ij]=v[give_index(i,j,n)];
+                               ij++;
+                       }
+               }
+
+                /*for(i=1;i<n-1;i++)
+                {
+                  for(j=i+1;j<=n-1;j++)
+                   {Rprintf("%f ",new_dist[give_index(i,j,n-1)]);
+                   }
+                  Rprintf("\n");
+                }*/
+                //compute Rui, only if distance matrix is still incomplete
+                ij=0;
+                if(sw==1)
+                for(i=2;i<n;i++)
+                  {
+                   ij++;
+                   if(new_dist[give_index(i,1,n-1)]==-1)continue;
+
+                   for(j=1;j<n;j++)
+                     {
+                       if(j==1 || j==i)
+                       {
+                         newS[give_index(1,i,n-1)]++;
+                         continue;
+                       }
+                       if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1)
+                        {
+                          newS[give_index(1,i,n-1)]++;
+                          newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
+                          newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
+                        }
+                     }
+                  }
+                //fill in the rest of R and S, again only if distance matrix still
+                //incomplete
+                if(sw==1)
+                for(i=1;i<n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 for(j=i+1;j<=n;j++)
+                  {if(j==OTU1 || j==OTU2)continue;
+                   newR[ij]=R[give_index(i,j,n)];
+                   newS[ij]=s[give_index(i,j,n)];
+                   ij++;
+                  }
+                }
+                //update newR and newS with the new taxa, again only if distance
+                //matrix is still incomplete
+                if(sw==1)
+                for(i=2;i<n-1;i++)
+                {if(new_dist[give_index(1,i,n-1)]==-1)continue;
+                 for(j=i+1;j<=n-1;j++)
+                  {if(new_dist[give_index(1,j,n-1)]==-1)continue;
+                   newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]);
+                   newS[give_index(i,j,n-1)]++;
+                  }
+                }
+               /* compute the branch lengths */
+
+
+
+               /* update before the next loop
+                  (we are sure that OTU1 < OTU2) */
+               if (OTU1 != 1)
+                       for (i = OTU1; i > 1; i--)
+                               otu_label[i] = otu_label[i - 1];
+               if (OTU2 != n)
+                       for (i = OTU2; i < n; i++)
+                               otu_label[i] = otu_label[i + 1];
+               otu_label[1] = cur_nod;
+
+
+
+               n--;
+               for (i = 0; i < n*(n - 1)/2; i++)
+                  {
+                    D[i] = new_dist[i];
+                    v[i] = new_v[i];
+                    if(sw==1)
+                       {
+                        R[i] = newR[i];
+                        s[i] = newS[i];
+                       }
+                  }
+               cur_nod--;
+               k = k + 2;
+       }
+        int dK=0;//number of known distances in final distance matrix
+        int iUK=-1;//index of unkown distance, if we have one missing distance
+        int iK=-1;//index of only known distance, only needed if dK==1
+        for (i = 0; i < 3; i++) {
+               edge1[*N*2 - 4 - i] = cur_nod;
+               edge2[*N*2 - 4 - i] = otu_label[i + 1];
+                if(D[i]!=-1){dK++;iK=i;}else{iUK=i;}
+       }
+        if(dK==2)
+         {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown
+          //and edge weights of three edges are a,b,c, then any b,c>0 that
+          //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for
+          //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some
+          //algebra we get that we can set the missing distance equal to the
+          //maximum of the already present distances
+            double max=-1e50;
+          for(i=0;i<3;i++)
+            {if(i==iUK)continue;
+             if(D[i]>max)max=D[i];
+            }
+          D[iUK]=max;
+         }
+        if(dK==1)
+         {//through similar motivation as above, if we have just one known distance
+          //we set the other two distances equal to it
+          for(i=0;i<3;i++)
+            {if(i==iK)continue;
+             D[i]=D[iK];
+            }
+         }
+        if(dK==0)
+         {//no distances are known, we just set them to 1
+          for(i=0;i<3;i++)
+           {D[i]=1;
+           }
+         }
+        edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
+       edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
+       edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
+}
+
+
+
index 9d0b8f66a7ae5df84fb072fb228cc3a969ee8476..21b4c1e00f0cbbd164a9b10986262d54da8fb26c 100644 (file)
--- a/src/nj.c
+++ b/src/nj.c
@@ -1,21 +1,11 @@
-/* nj.c       2010-04-21 */
+/* nj.c       2011-10-20 */
 
-/* Copyright 2006-2010 Emmanuel Paradis
+/* Copyright 2006-2011 Emmanuel Paradis
 
 /* This file is part of the R-package `ape'. */
 /* See the file ../COPYING for licensing issues. */
 
-#include <R.h>
-
-#define DINDEX(i, j) n*(i - 1) - i*(i - 1)/2 + j - i - 1
-/* works if i < j strictly, and i = 1, ...;
-   see give_index() below */
-
-int give_index(int i, int j, int n)
-{
-       if (i > j) return(DINDEX(j, i));
-       else return(DINDEX(i, j));
-}
+#include "ape.h"
 
 double sum_dist_to_i(int n, double *D, int i)
 /* returns the sum of all distances D_ij between i and j
diff --git a/src/njs.c b/src/njs.c
new file mode 100644 (file)
index 0000000..2ccfdef
--- /dev/null
+++ b/src/njs.c
@@ -0,0 +1,667 @@
+/* njs.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+int H(double t)
+{
+       if (t >= 0) return 1;
+       return 0;
+}
+
+void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS)
+{
+    int i=0,j=0,k=0;
+    int sww=0;
+    double cFS[fS];
+    int iFS[fS];
+    int jFS[fS];
+    for(k=0;k<fS;k++)
+              {cFS[k]=-1e50;
+               iFS[k]=0;
+               jFS[k]=0;
+              }
+    double max=-1e50;
+    for (i = 1; i < n; i++)
+      {
+       for (j = i + 1; j <= n; j++)
+         {if(D[give_index(i,j,n)]==-1){sww=1;continue;}
+           //Rprintf("R[%i,%i]=%f\n",i,j,R[give_index(i,j,n)]);
+           //Rprintf("s[%i,%i]=%i\n",i,j,s[give_index(i,j,n)]);
+           //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
+           int tr=0;
+            double numb=((R[give_index(i,j,n)])/(s[give_index(i,j,n)]-2))-D[give_index(i,j,n)];
+           //Rprintf("numb(%i,%i)=%f\n",i,j,numb);
+           for(k=0;k<fS, cFS[k]>numb;k++);
+           //Rprintf("k=%i ",k);
+           for(tr=fS-1;tr>k;tr--)
+             {cFS[tr]=cFS[tr-1];
+              iFS[tr]=iFS[tr-1];
+              jFS[tr]=jFS[tr-1];
+             }
+
+            if(k<fS){cFS[k]=numb;
+                     iFS[k]=i;
+                     jFS[k]=j;
+                    }
+          }
+      }
+
+    //no missing distances, just return the one with maximal Q-criterion
+   if(sww==0){*x=iFS[0];*y=jFS[0];*sw=0;return;
+             }
+   /*for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+  //calculate N*(x,y)
+  for(i=0;i<fS;i++)
+   {
+    double nb=nxy(iFS[i],jFS[i],n,D);
+    if(nb>max){max=nb;}
+    cFS[i]=nb;
+   }
+  /*Rprintf("all N*(x,y)\n");
+  for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+  int dk=0;
+  //shift the max N*xy to the front of the array
+  for(i=0;i<fS;i++)
+   {
+      if(cFS[i]==max)
+        {
+          cFS[dk]=cFS[i];
+          iFS[dk]=iFS[i];
+          jFS[dk]=jFS[i];
+          dk++;
+        }
+   }
+  //if just one pair realises max N*xy, return it
+  if(dk==1){*x=iFS[0];*y=jFS[0];return;}
+  fS=dk;
+ /*Rprintf("max n*xy values\n");
+ for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+ max=-1e50;
+ //on the fron of the array containing max N*xy values compute cxy
+ for(i=0;i<fS;i++)
+  {
+    double nb=cxy(iFS[i],jFS[i],n,D);
+    if(nb>max){max=nb;}
+    cFS[i]=nb;
+  }
+ /*Rprintf("all c*xy values\n");
+ for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+  //and again shift maximal C*xy values at the fron of the array
+  dk=0;
+  for(i=0;i<fS;i++)
+   {
+      if(cFS[i]==max)
+        {
+          cFS[dk]=cFS[i];
+          iFS[dk]=iFS[i];
+          jFS[dk]=jFS[i];
+          dk++;
+        }
+   }
+ //if just one C*xy with maximal value, return pair realising it
+ if(dk==1){*x=iFS[0];*y=jFS[0];return;}
+ fS=dk;
+ /*Rprintf("max c*xy values\n");
+ for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+ max=-1e50;
+ //on the front of the array containing max C*xy compute m*xy
+ for(i=0;i<fS;i++)
+  {
+    double nb=mxy(iFS[i],jFS[i],n,D);
+    if(nb>max){max=nb;}
+    cFS[i]=nb;
+  }
+ /*Rprintf("all m*xy values\n");
+ for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+  //again shift maximal m*xy values to the fron of the array
+  dk=0;
+  for(i=0;i<fS;i++)
+   {
+      if(cFS[i]==max)
+        {
+          cFS[dk]=cFS[i];
+          iFS[dk]=iFS[i];
+          jFS[dk]=jFS[i];
+          dk++;
+        }
+   }
+ //if just one maximal value for m*xy return the pair realising it, found at 0
+ if(dk==1){*x=iFS[0];*y=jFS[0];return;}
+ fS=dk;
+ /*Rprintf("max m*xy values\n");
+ for(k=0;k<fS;k++)
+              {Rprintf("value[%i]=%f ",k,cFS[k]);
+               Rprintf("i=%i ",iFS[k]);
+               Rprintf("j=%i ",jFS[k]);
+               Rprintf("\n");
+              }*/
+ //and calculate cnxy on these values, but this time we do not shift, but simply
+ //return the pair realising the maximum, stored at iPos
+ max=-1e50;
+ int iPos=-1;
+ for(i=0;i<fS;i++)
+  {
+    double nb=cnxy(iFS[i],jFS[i],n,D);
+    if(nb>max){max=nb;iPos=i;}
+    cFS[i]=nb;
+  }
+ /*Rprintf("cN*xy\n");
+ Rprintf("value[%i]=%f ",0,cFS[0]);
+ Rprintf("i=%i ",iFS[0]);
+ Rprintf("j=%i ",jFS[0]);
+ Rprintf("\n");*/
+ *x=iFS[iPos];*y=jFS[iPos];
+}
+
+double cnxy(int x, int y, int n,double* D)
+{
+    int i=0;
+    int j=0;
+    double nMeanXY=0;
+    //Rprintf("cN[%i,%i]\n",x,y);
+    for(i=1;i<=n;i++)
+     {if(i==x || i==y)continue;
+      for(j=1;j<=n;j++)
+      {if(i==j)continue;
+       if(j==y || j==x)continue;
+       double n1=0;
+       double n2=0;
+       n1=D[give_index(i,x,n)];
+       n2=D[give_index(j,y,n)];
+       if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue;
+       nMeanXY+=(n1+n2-D[give_index(x,y,n)]-D[give_index(i,j,n)]);
+       //Rprintf("cnMeanXY after (%i,%i)=%f\n",i,j,nMeanXY);
+      }
+     }
+    return nMeanXY;
+}
+
+int mxy(int x,int y,int n,double* D)
+{
+    int i=0;
+    int mx[n+1];
+    int my[n+1];
+    for(i=1;i<=n;i++)
+      {
+        mx[i]=0;my[i]=0;
+      }
+    for(i=1;i<=n;i++)
+      {
+        if(i!=x && D[give_index(x,i,n)]==-1)
+          {
+            mx[i]=1;
+          }
+        if(i!=y && D[give_index(y,i,n)]==-1)
+          {
+            my[i]=1;
+          }
+      }
+    /*for(i=1;i<=n;i++)
+      {
+        Rprintf("mx[%i]=%i ",i,mx[i]);
+      }
+    Rprintf("\n");
+
+    for(i=1;i<=n;i++)
+      {
+        Rprintf("my[%i]=%i ",i,my[i]);
+      }
+    Rprintf("\n");*/
+
+    int xmy=0;
+    int ymx=0;
+    for(i=1;i<=n;i++)
+      {
+        if(mx[i]==1 && my[i]==0)
+          {
+            xmy++;
+          }
+        if(my[i]==1 && mx[i]==0)
+          {
+            ymx++;
+          }
+      }
+    //Rprintf("xmy=%i, ymx=%i, xmy+ymx=%i\n",xmy,ymx,xmy+ymx);
+    return xmy+ymx;
+}
+double nxy(int x, int y, int n,double* D)
+{
+    int sCXY=0;
+    int i=0;
+    int j=0;
+    double nMeanXY=0;
+    //Rprintf("N[%i,%i]\n",x,y);
+    for(i=1;i<=n;i++)
+     {if(i==x || i==y)continue;
+      for(j=1;j<=n;j++)
+      {if(i==j)continue;
+       if(j==x || j==y)continue;
+       double n1=0;
+       double n2=0;
+       n1=D[give_index(i,x,n)];
+       n2=D[give_index(j,y,n)];
+       if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue;
+       sCXY++;
+       //Rprintf("considered pair (%i,%i)\n",i,j);
+       nMeanXY+=H(n1+n2-D[give_index(x,y,n)]-D[give_index(i,j,n)]);
+       //Rprintf("nMeanXY after (%i,%i)=%f\n",i,j,nMeanXY);
+      }
+     }
+    //Rprintf("sCXY=%i",sCXY);
+    return nMeanXY/sCXY;
+}
+
+int cxy(int x, int y, int n,double* D)
+{
+    int sCXY=0;
+    int i=0;
+    int j=0;
+
+    for(i=1;i<=n;i++)
+     {if(i==x || i==y)continue;
+      for(j=1;j<=n;j++)
+      {if(i==j)continue;
+       if(j==x || j==y)continue;
+       double n1=0;
+       double n2=0;
+       if(i!=x)n1=D[give_index(i,x,n)];
+       if(j!=y)n2=D[give_index(j,y,n)];
+       if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue;
+       sCXY++;
+      }
+     }
+    return sCXY;
+}
+
+void njs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int *fsS)
+{       //assume missing values are denoted by -1
+       double *S,*R, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y;
+       int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
+        /*for(i=0;i<n*(n-1)/2;i++)
+          {if(isNA(D[i])){D[i]=-1;}
+          }*/
+        int *s;//s contains |Sxy|, which is all we need for agglomeration
+        double *newR;
+        int *newS;
+        int fS=*fsS;
+
+       R = &Sdist;
+       new_dist = &Ndist;
+       otu_label = &o_l;
+        n = *N;
+       cur_nod = 2*n - 2;
+
+       R = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+        S = (double*)R_alloc(n + 1, sizeof(double));
+        newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
+       otu_label = (int*)R_alloc(n + 1, sizeof(int));
+        s = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
+        newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
+
+       for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
+
+       k = 0;
+
+        //compute Sxy and Rxy
+        for(i=0;i<n*(n-1)/2;i++)
+          {newR[i]=0;
+           newS[i]=0;
+           s[i]=0;
+           R[i]=0;
+          }
+
+        for(i=1;i<n;i++)
+         for(j=i+1;j<=n;j++)
+         {//algorithm assumes i,j /in Sij, so skip pair if it is not known
+          if(D[give_index(i,j,n)]==-1)
+            {
+              continue;
+            }
+          //Rprintf("for %i and %i :\n",i,j);
+          for(k=1;k<=n;k++)
+           {//ij is the pair for which we compute
+            //skip k if we do not know the distances between it and i AND j
+              if(k==i || k==j)
+               {
+                  s[give_index(i,j,n)]++;
+                  continue;
+               }
+              if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue;
+              //Rprintf("%i\n",k);
+              s[give_index(i,j,n)]++;
+              R[give_index(i,j,n)]+=D[give_index(i,k,n)];
+              R[give_index(i,j,n)]+=D[give_index(j,k,n)];
+           }
+         }
+
+        /*for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+
+        k=0;
+        int sw=1;//if 1 then incomplete
+       while (n > 3) {
+               ij = 0;
+                for(i=1;i<n;i++)
+                 for(j=i+1;j<=n;j++)
+                  {newR[give_index(i,j,n)]=0;
+                   newS[give_index(i,j,n)]=0;
+                  }
+               smallest_S = -1e50;
+                if(sw==0)
+                    for(i=1;i<=n;i++)
+                       {S[i]=0;
+                       }
+               B=n-2;
+                if(sw==1)
+                     {
+                      choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS);
+                     }
+                 else{ //Rprintf("distance matrix is now complete\n");
+                        for (i=1;i<=n;i++)
+                         for(j=1;j<=n;j++)
+                           {if(i==j)continue;
+                             //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n));
+                             //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
+                             S[i]+=D[give_index(i,j,n)];
+                           }
+                        B=n-2;
+                        //Rprintf("n=%i,B=%f",n,B);
+                       for (i = 1; i < n; i++) {
+                        for (j = i + 1; j <= n; j++) {
+                             //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
+                                A=S[i]+S[j]-B*D[give_index(i,j,n)];
+                                //Rprintf("Q[%i,%i]=%f\n",i,j,A);
+                               if (A > smallest_S) {
+                                       OTU1 = i;
+                                       OTU2 = j;
+                                       smallest_S = A;
+                                       smallest = ij;
+                               }
+                               ij++;
+                       }
+                       }
+                     }
+
+                /*Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }
+
+                for(i=1;i<n;i++)
+                  {
+                    for(j=i+1;j<=n;j++)
+                      {
+                        Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
+                      }
+                    Rprintf("\n");
+                  }*/
+                //update Rxy and Sxy, only if matrix still incomplete
+                if(sw==1)
+                for(i=1;i<n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 for(j=i+1;j<=n;j++)
+                  {if(j==OTU1 || j==OTU2)continue;
+                    if(D[give_index(i,j,n)]==-1)continue;
+                     if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1)
+                      {//OTU1 was considered for Rij, so now subtract
+                       R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]);
+                       s[give_index(i,j,n)]--;
+                      }
+                     if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1)
+                      {//OTU2 was considered for Rij, so now subtract
+                       R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]);
+                       s[give_index(i,j,n)]--;
+                      }
+                  }
+                }
+
+               edge2[k] = otu_label[OTU1];
+               edge2[k + 1] = otu_label[OTU2];
+               edge1[k] = edge1[k + 1] = cur_nod;
+
+               /* get the distances between all OTUs but the 2 selected ones
+                  and the latter:
+                  a) get the sum for both
+                  b) compute the distances for the new OTU */
+
+                double sum=0;
+
+                for(i=1;i<=n;i++)
+                 {if(i==OTU1 || i==OTU2)continue;
+                 if(D[give_index(OTU1,i,n)]==-1 || D[give_index(OTU2,i,n)]==-1)continue;
+                 sum+=(D[give_index(OTU1,i,n)]-D[give_index(OTU2,i,n)]);
+                 }
+                //although s was updated above, s[otu1,otu2] has remained unchanged
+                //so it is safe to use it here
+                //if complete distanes, use N-2, else use S
+                int down=B;
+                if(sw==1){down=s[give_index(OTU1,OTU2,n)]-2;}
+                if(down==0)
+                  {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2);
+                  }
+                //Rprintf("down=%i\n",down);
+                sum*=(1.0/(2*(down)));
+                //Rprintf("sum=%f\n",sum);
+                double dxy=D[give_index(OTU1,OTU2,n)]/2;
+
+                //Rprintf("R[%i,%i]:%f \n",OTU1,OTU2,sum);
+               edge_length[k] = dxy+sum;//OTU1
+                //Rprintf("l1:%f \n",edge_length[k]);
+               edge_length[k + 1] = dxy-sum;//OTU2
+                //Rprintf("l2:%f \n",edge_length[k+1]);
+               //no need to change distance matrix update for complete distance
+               //case, as pairs will automatically fall in the right cathegory
+               A = D[give_index(OTU1,OTU2,n)];
+               ij = 0;
+               for (i = 1; i <= n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                        if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1)
+                         {
+                            new_dist[ij]=0.5*(D[give_index(OTU1,i,n)]-edge_length[k]+D[give_index(OTU2,i,n)]-edge_length[k+1]);
+                         }else{
+                         if(D[give_index(OTU1,i,n)]!=-1)
+                                {
+                                 new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k];
+                                }else{
+                                      if(D[give_index(OTU2,i,n)]!=-1)
+                                        {
+                                            new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1];
+                                        }else{new_dist[ij]=-1;}
+                                     }
+                              }
+
+                       ij++;
+               }
+
+                for (i = 1; i < n; i++) {
+                       if (i == OTU1 || i == OTU2) continue;
+                       for (j = i + 1; j <= n; j++) {
+                               if (j == OTU1 || j == OTU2) continue;
+                               new_dist[ij] = D[DINDEX(i, j)];
+                               ij++;
+                       }
+               }
+
+                /*for(i=1;i<n-1;i++)
+                {
+                  for(j=i+1;j<=n-1;j++)
+                   {Rprintf("%f ",new_dist[give_index(i,j,n-1)]);
+                   }
+                  Rprintf("\n");
+                }*/
+                //compute Rui, only if distance matrix is still incomplete
+                ij=0;
+                if(sw==1)
+                for(i=2;i<n;i++)
+                  {
+                   ij++;
+                   if(new_dist[give_index(i,1,n-1)]==-1)continue;
+
+                   for(j=1;j<n;j++)
+                     { if(j==1 || j==i)
+                       {
+                         newS[give_index(1,i,n-1)]++;
+                         continue;
+                       }
+                       if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1)
+                        {
+                          newS[give_index(1,i,n-1)]++;
+                          newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
+                          newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
+                        }
+                     }
+                  }
+                //fill in the rest of R and S, again only if distance matrix still
+                //incomplete
+                for(i=1;i<n;i++)
+                {if(i==OTU1 || i==OTU2)continue;
+                 for(j=i+1;j<=n;j++)
+                  {if(j==OTU1 || j==OTU2)continue;
+                   newR[ij]=R[give_index(i,j,n)];
+                   newS[ij]=s[give_index(i,j,n)];
+                   ij++;
+                  }
+                }
+                //update newR and newS with the new taxa, again only if distance
+                //matrix is still incomplete
+                if(sw==1)
+                for(i=2;i<n-1;i++)
+                {if(new_dist[give_index(1,i,n-1)]==-1)continue;
+                 for(j=i+1;j<=n-1;j++)
+                  {if(new_dist[give_index(1,j,n-1)]==-1)continue;
+                    if(new_dist[give_index(i,j,n-1)]==-1)continue;
+                   newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]);
+                   newS[give_index(i,j,n-1)]++;
+                  }
+                }
+
+               /* update before the next loop
+                  (we are sure that OTU1 < OTU2) */
+               if (OTU1 != 1)
+                       for (i = OTU1; i > 1; i--)
+                               otu_label[i] = otu_label[i - 1];
+               if (OTU2 != n)
+                       for (i = OTU2; i < n; i++)
+                               otu_label[i] = otu_label[i + 1];
+               otu_label[1] = cur_nod;
+
+               n--;
+               for (i = 0; i < n*(n - 1)/2; i++)
+                  {
+                    D[i] = new_dist[i];
+                    if(sw==1)
+                       {
+                        R[i] = newR[i];
+                        s[i] = newS[i];
+                       }
+                  }
+               cur_nod--;
+               k = k + 2;
+       }
+        int dK=0;//number of known distances in final distance matrix
+        int iUK=-1;//index of unkown distance, if we have one missing distance
+        int iK=-1;//index of only known distance, only needed if dK==1
+        for (i = 0; i < 3; i++) {
+               edge1[*N*2 - 4 - i] = cur_nod;
+               edge2[*N*2 - 4 - i] = otu_label[i + 1];
+                if(D[i]!=-1){dK++;iK=i;}else{iUK=i;}
+       }
+        if(dK==2)
+         {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown
+          //and edge weights of three edges are a,b,c, then any b,c>0 that
+          //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for
+          //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some
+          //algebra we get that we can set the missing distance equal to the
+          //maximum of the already present distances
+            double max=-1e50;
+          for(i=0;i<3;i++)
+            {if(i==iUK)continue;
+             if(D[i]>max)max=D[i];
+            }
+          D[iUK]=max;
+         }
+        if(dK==1)
+         {//through similar motivation as above, if we have just one known distance
+          //we set the other two distances equal to it
+          for(i=0;i<3;i++)
+            {if(i==iK)continue;
+             D[i]=D[iK];
+            }
+         }
+        if(dK==0)
+         {//no distances are known, we just set them to 1
+          for(i=0;i<3;i++)
+           {D[i]=1;
+           }
+         }
+        edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
+       edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
+       edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
+}
+
diff --git a/src/treePop.c b/src/treePop.c
new file mode 100644 (file)
index 0000000..5dfaad4
--- /dev/null
@@ -0,0 +1,242 @@
+/* treePop.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include <math.h>
+#include <R.h>
+#include <stdint.h>
+
+int lsb(char* a)
+{
+       int i = 0;
+       while (a[i] == 0) i++; /* count number of elements = 0 */
+
+       int b = 7;
+       while ((a[i] & (1 << b)) == 0) b--;
+
+       return i*8 + (8 - b);
+}
+
+short count_bits(uint8_t n)
+{
+       short c; /* c accumulates the total bits set in v */
+       for (c = 0; n; c++)
+               n &= n - 1; /* clear the least significant bit set */
+       return c;
+}
+
+/* Print n as a binary number */
+/*void printbits(uint8_t n)
+{
+       uint8_t i;
+       i = 1 << (sizeof(n) * 8 - 1);
+
+       while (i > 0) {
+               if (n & i) Rprintf("1");
+               else Rprintf("0");
+               i >>= 1;
+       }
+}*/
+
+uint8_t* setdiff(uint8_t* x, uint8_t *y, int nrow) //x-y
+{
+       int i = 0;
+       uint8_t* ret = (uint8_t*)R_alloc(nrow, sizeof(uint8_t));
+       for (i = 0; i < nrow; i++) {
+               uint8_t tmp = (~y[i]);
+
+/*             Rprintf("x[%i]=",i);
+               printbits(x[i]);
+               Rprintf("\n");
+               Rprintf("y[%i]=",i);
+               printbits(y[i]);
+               Rprintf("\n");
+               Rprintf("tmp=\n");
+               printbits(tmp);
+               Rprintf("\n"); */
+
+               ret[i] = (x[i] & tmp);
+       }
+       return ret;
+}
+
+void treePop(int* splits, double* w,int* ncolp,int* np, int* ed1, int* ed2, double* edLen)
+  {
+    int n=*np;
+    int ncol=*ncolp;
+    int nrow=ceil(n/(double)8);
+    uint8_t split[nrow][ncol];
+    int i=0,j=0;
+
+    /*Rprintf("n=%i nrow=%i ncol=%i\n",n,nrow,ncol);
+    Rprintf("got\n");
+    for(i=0;i<ncol;i++)
+    {
+     for(j=0;j<nrow;j++)
+       {
+          Rprintf("%i ",splits[i*nrow+j]);
+       }
+     Rprintf("\n");
+    }*/
+
+    for(i=0;i<ncol;i++)
+    {
+     for(j=0;j<nrow;j++)
+       {
+         split[j][i]=(uint8_t)splits[i*nrow+j];
+       }
+    }
+    /*Rprintf("short-ed\n");
+    for(i=0;i<nrow;i++)
+    {
+      for(j=0;j<ncol;j++)
+       {
+         printbits(split[i][j]);
+         Rprintf("\n");
+       }
+      Rprintf("\n");
+    }*/
+
+   uint8_t vlabs[2*n-1][nrow];
+   for(i=0;i<nrow-1;i++)
+    {
+       vlabs[n+1][i]=255;
+    }
+   vlabs[n+1][nrow-1]=~((uint8_t)(pow(2,8-(n%8))-1));
+
+   int bitt_count[ncol];
+   uint8_t msk=~((uint8_t)(pow(2,8-(n%8))-1));//mask out trailing bits
+   //printbits(msk);
+   for(i=0;i<ncol;i++)
+    {
+      int sum=0;
+      for(j=0;j<nrow-1;j++)
+       { //Rprintf("countbits(%i)=%i ",split[j][i],count_bits(split[j][i]));
+         sum+=count_bits(split[j][i]);
+       }
+      uint8_t bt=split[nrow-1][i];
+      bt&=msk;
+      //Rprintf("countbits(%i)=%i ",bt,count_bits(bt));
+      sum+=count_bits(bt);
+     // Rprintf("bit_count[%i]=%i ",i,sum);
+      //Rprintf("\n");
+      if(sum>n/2)
+       {
+         for(j=0;j<nrow;j++)
+          {
+             split[j][i]=~split[j][i];
+          }
+         split[nrow-1][i]&=msk;
+         sum=n-sum;
+       }
+      bitt_count[i]=sum;
+    }
+   int ind[ncol];
+   for(i=0;i<ncol;i++)
+     {
+       ind[i]=i;
+     }
+
+   for(i=0;i<ncol-1;i++)
+     {
+       for(j=i+1;j<ncol;j++)
+        {
+          if(bitt_count[i]<bitt_count[j])
+            { int aux;
+              aux=bitt_count[i];
+              bitt_count[i]=bitt_count[j];
+              bitt_count[j]=aux;
+              aux=ind[i];
+              ind[i]=ind[j];
+              ind[j]=aux;
+            }
+        }
+     }
+   int nNodes=n+1;
+   int numEdges=0;
+   for(i=0;i<ncol;i++)
+    {  int ii=0;
+       //Rprintf("split %i\n",i);
+       uint8_t sp[nrow];
+       for(ii=0;ii<nrow;ii++)
+         {//copy split into sp
+          sp[ii]=split[ii][ind[i]];
+         }
+      //search for node whose labellings are a superset of the current split
+      for(j=n+1;j<=nNodes;j++)
+       {
+          uint8_t vl[nrow];
+          for(ii=0;ii<nrow;ii++)
+            {//copy vertex labeling into vl
+              vl[ii]=vlabs[j][ii];
+            }
+         int sw=0;//print current split
+         for(ii=0;ii<nrow;ii++)
+           {
+             //Rprintf("sp[%i]= ",ii);
+            //printbits(sp[ii]);
+           }
+         //Rprintf("\n");//print current label
+         for(ii=0;ii<nrow;ii++)
+           {
+            // Rprintf("vl[%i]= ",ii);
+           //  printbits(vl[ii]);
+           }
+         //Rprintf("\n");
+         uint8_t* sd=setdiff(sp,vl,nrow);
+         //print the setdifference
+         for(ii=0;ii<nrow/*-1*/;ii++)
+           { //Rprintf("sd[%i]=%i ",ii,sd[ii]);
+             if(sd[ii]!=0)sw=1;
+           }
+        // Rprintf("\n");
+
+         sd[nrow-1]&=msk;
+         //Rprintf("sd[%i]=%i ",nrow-1,sd[nrow-1]);
+         if(sd[nrow-1]!=0)sw=1;
+         if(sw==0)//setdiff==0, so we split vertex j
+          { // Rprintf("vertex %i",j);
+             ed1[numEdges]=j;
+
+             int gn=-1;
+            // Rprintf("bitt_count[%i]=%i\n",i,bitt_count[i]);
+             if(bitt_count[i]>=2)//if not trivial split
+             {nNodes++;
+              gn=nNodes;
+             }
+             else
+             {
+               gn=lsb(sp);
+             }
+            // Rprintf("gn=%i\n",gn);
+             ed2[numEdges]=gn;
+             edLen[numEdges]=w[ind[i]];
+             numEdges++;
+             uint8_t* sdd=setdiff(vl,sp,nrow);
+             for(ii=0;ii<nrow;ii++)//label current vertex byset difference
+                                   //and new vertex by actual split
+               {
+                 vlabs[j][ii]=sdd[ii];
+                 vlabs[gn][ii]=sp[ii];
+               }
+             //print new labels
+            // Rprintf("new v\n");
+             int jj=0;
+             for(ii=1;ii<=nNodes;ii++)
+              {//Rprintf("node %i : ",ii);
+               for(jj=0;jj<nrow;jj++)
+                {
+                  //printbits(vlabs[ii][jj]);
+                 // Rprintf(" ");
+                }
+              // Rprintf("\n");
+              }
+             break;
+          }
+
+       }
+    }
+  }
diff --git a/src/triangMtd.c b/src/triangMtd.c
new file mode 100644 (file)
index 0000000..90b9424
--- /dev/null
@@ -0,0 +1,357 @@
+/* triangMtd.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+/*
+ * leafs labelled 1 to n. root labelled n+1. other nodes labelled n+1 to m
+ */
+
+#include <R.h>
+
+int give_indexx(int i, int j, int n)
+{       if (i == j) return 0;
+       if (i > j) return(n*(j - 1) - j*(j - 1)/2 + i - j - 1);
+       else return(n*(i - 1) - i*(i - 1)/2 + j - i - 1);
+}
+
+int pred(int k, int* ed1, int* ed2, int numEdges)
+/* find the predecesor of vertex k */
+{
+       int i = 0;
+       for (i = 0; i <= numEdges; i++)
+               if (ed2[i] == k) return ed1[i];
+       return -1;
+}
+
+int* getPathBetween(int x, int y, int n, int* ed1, int* ed2, int numEdges)
+//get the path between vertices x and y in an array ord.
+//ord[i]=j means that we go between i and j on the path between x and y
+ {  int i=0;
+    int k=x;
+    int ch[2*n-1];//ch[i]==1 implies {k,pred(k)} on path between x and y
+    for(i=1;i<=2*n-2;i++)
+      {ch[i]=0;
+      }
+
+    while(k!=n+1)
+      {
+        ch[k]=1;
+        k=pred(k,ed1,ed2,numEdges);
+      }
+    k=y;
+
+    while(k!=n+1)
+      {
+        ch[k]++;
+        k=pred(k,ed1,ed2,numEdges);
+      }
+        int *ord=(int*)malloc(sizeof(int)*(2*n-1));
+        //starting from x, fill ord
+
+
+        int p=x;
+
+        while(ch[p]==1)
+          {
+            int aux=p;
+            p=pred(p,ed1,ed2,numEdges);
+            ord[aux]=p;
+          }
+        p=y;
+
+        while(ch[p]==1)
+          {
+            int aux=p;
+            p=pred(p,ed1,ed2,numEdges);
+            ord[p]=aux;//other way
+          }
+
+      return ord;
+ }
+
+int getLength(int x, int y, int* ed1, int* ed2, int numEdges, int* edLen)
+/* get length of edge {x,y}, -1 if edge does not exist */
+{
+       int i = 0;
+       for (i = 0; i <= numEdges; i++)
+               if ((ed1[i] == x && ed2[i] == y) || (ed1[i] == y && ed2[i] == x))
+                       return edLen[i];
+       return -1;
+}
+
+void triangMtd(double* d, int* np, int* ed1,int* ed2, double* edLen)
+{
+    int n=*np;
+    int i=0;
+    int j=0;
+    int ij=-1;
+
+    for(i=0;i<n-1;i++)
+    {
+     for(j=i+1;j<n;j++)
+       {ij++;
+        //error("%f ,giveindex= %f",d[ij],d[give_indexx(i,j,n)]);
+       }
+    }
+
+    /*double d[n*n];
+
+
+    for(i=0;i<n;i++)
+    {d[i*n+i]=0;
+     for(j=i;j<n;j++)
+       {d[i*n+j]=d[j*n+i];
+       }
+    }
+    for(i=0;i<n;i++)
+    {
+     for(j=0;j<n;j++)
+       {error("%f ",d[i*n+j]);
+       }
+     error("\n");
+    }*/
+
+    double minW=0;
+    int x=-1,y=-1,numEdges=0;
+    int st[n+1];//array holding at position i the starting point of attachment path of leaf i
+    int ed[n+1];//array holding at position i the ending point of attachment path of leaf i
+    double l[n+1];//array holding at position i the distance of leaf i from the partially constructed tree
+    int w[n+1];//presence array:w[i]=1 if leaf i is added to the tree, 0 otherwise
+    int wSize=0;//number of leaves added so far
+
+    for(i=1;i<=n;i++){w[i]=0;}
+    //find the minimum distance in our matrix
+
+    int sw=0;
+    //choose two leaves x,y such that d[x][y] is minimal
+    for(i=1;i<n;i++)
+    {
+     for(j=i+1;j<=n;j++)
+       {if(d[give_indexx(i,j,n)]<=0)continue;
+        minW=d[give_indexx(i,j,n)];
+        x=i;
+        y=j;
+        sw=1;
+        break;
+       }
+     if(sw)break;
+    }
+    for(i=1;i<n;i++)
+     for(j=i+1;j<=n;j++)
+       { if(i==j)continue;
+         if(d[give_indexx(i,j,n)]<minW)
+           {
+             minW=d[give_indexx(i,j,n)];
+             x=i;
+             y=j;
+           }
+       }
+
+    w[x]=1; w[y]=1;//mark x and y as added
+
+    //calculate the distance between leaves not in w and leaves in w
+    for(i=1;i<=n;i++)
+      {
+        if(w[i]){continue;}
+        st[i]=x;ed[i]=y;
+        l[i]=0.5*(d[give_indexx(i,x,n)]+d[give_indexx(i,y,n)]-d[give_indexx(x,y,n)]);//distance of leaf i from the
+//initial tree
+      }
+
+
+    wSize+=3;//since we construct a star tree on three leaves
+    int nv=n+1;//since first n numbers are reserved for leaves
+    double minDist=1000;
+    int x3=1;
+    //search for x3 that is closest to the tree
+    for(i=1;i<=n;i++)
+          {if(w[i])continue;//skip if leaf already added
+            if(l[i]<minDist)
+              {
+                minDist=l[i];
+                x3=i;
+              }
+          }
+    //construct initial star tree on three leaves:x,y,x3. nv is the interior
+    //vertex and also the root of the tree
+    ed1[numEdges]=nv;
+    ed2[numEdges]=x;
+    edLen[numEdges]=0.5*(d[give_indexx(x3,x,n)]+d[give_indexx(y,x,n)]-d[give_indexx(y,x3,n)]);
+    numEdges++;
+    ed1[numEdges]=nv;
+    ed2[numEdges]=y;
+    edLen[numEdges]=0.5*(d[give_indexx(y,x3,n)]+d[give_indexx(y,x,n)]-d[give_indexx(x,x3,n)]);
+    numEdges++;
+    ed1[numEdges]=nv;
+    ed2[numEdges]=x3;
+    edLen[numEdges]=0.5*(d[give_indexx(x3,x,n)]+d[give_indexx(y,x3,n)]-d[give_indexx(y,x,n)]);
+    w[x3]=1;
+
+        /*Rprintf("new tree\n");
+        for(i=0;i<2*n-3;i++)
+        {
+        Rprintf("%i->%i of length:%f \n",ed1[i],ed2[i],edLen[i]);
+        }
+        Rprintf("end new tree\n");*/
+
+    //calculate distance of leaves not yet added to the star tree
+    int s;
+    for(s=1;s<=n;s++)
+         {if(w[s])continue;
+           for(i=1;i<=n;i++)
+            {  if(i==x3)continue;
+               if(!w[i])continue;
+               double newL=0.5*(d[give_indexx(i,s,n)]+d[give_indexx(s,x3,n)]-d[give_indexx(i,x3,n)]);
+               if(newL<l[s])
+                  {
+                   l[s]=newL;
+                   st[s]=i;
+                   ed[s]=x3;
+                  }
+            }
+         }
+
+    //partial tree construction begins here
+
+    while(wSize<n)
+      { double minDist=1e50;
+        int z=1;
+
+        //search for leaf z which is closest to partial tree
+        for(i=1;i<=n;i++)
+          {if(w[i])continue;//skip if leaf already added
+            if(l[i]<minDist)
+              {
+                minDist=l[i];
+                z=i;
+              }
+          }
+        //x and y are the leaves on the path between which z is attached
+        x=st[z];y=ed[z];
+
+
+        int* ord=getPathBetween(x,y,n,ed1,ed2,numEdges);
+       /* Rprintf("ord\n");
+        for(i=1;i<=2*n-2;i++)
+          {Rprintf("ord[%i]=%i ",i,ord[i]);
+          }
+        Rprintf("\n");*/
+        //order the path from x to y, in an array ord, ord[i]=j means vertex i comes before j
+        //first count number of edges on path (i.e count all i s.t ch[i]==1)
+
+
+        //look for the edge on the path x to y to subdivide
+        int p=x;
+        double sum=0;
+        double prevSum=0;
+        int aux=0;
+        int subdiv=-1;//index of edge to subdivide
+        //error("d[y,x]=%f,d[z,x]=%f,d[z,y]=%f\n",d[give_indexx(y,x,n)],d[give_indexx(z,x,n)],d[give_indexx(z,y,n)]);
+        double lx=0.5*(d[give_indexx(y,x,n)]+d[give_indexx(z,x,n)]-d[give_indexx(z,y,n)]);//distance of attachment
+       // Rprintf("adding %i on the path between %i and %i at a distance from x of %f and a distance of %f from tree",z,x,y,lx,minDist);
+        //point from x
+        //Rprintf("Adding leaf %i, between %i and %i\n",z,x,y);
+       // Rprintf("%i situated at a distance of %d from tree",z,minDist);
+        int sw=0;
+        //Rprintf("path between %i and %i\n",x,y);
+
+        while(p!=y && sum<lx)
+          {
+            aux=p;
+            //error("%i -> %i ",p,ord[p]);
+            p=ord[p];
+            prevSum=sum;
+            for(i=0;i<=numEdges;i++)
+              {
+                if((ed1[i]==aux && ed2[i]==p)||(ed2[i]==aux && ed1[i]==p))
+                  {
+                    if(ed2[i]==aux && ed1[i]==p){sw=1;}
+                    subdiv=i;
+                    sum+=edLen[i];
+                  }
+              }
+          }
+
+        nv++;
+        //subdivide subdiv with a node labelled nv
+        //length calculation
+        //multifurcating vertices
+
+        int edd=ed2[subdiv];
+        ed2[subdiv]=nv;
+        edLen[subdiv]= (sw==1)?(lx-prevSum):(sum-lx);//check which 'half' of the
+                                                     //path the leaf belongs to
+                                                     //and updates accordingly
+        //error("sum=%f, prevsum=%f\n",sum,prevSum);
+        //error("lx-prevSum=%f, sum-lx=%f, minDist=%f",lx-prevSum,sum-lx,minDist);
+        numEdges++;
+        ed1[numEdges]=nv;
+        ed2[numEdges]=edd;
+        edLen[numEdges]= (sw==1)?(sum-lx):(lx-prevSum);
+        numEdges++;
+        edLen[numEdges]=minDist;
+        ed1[numEdges]=nv;
+        ed2[numEdges]=z;
+
+        wSize++;
+        w[z]=1;
+
+        /*update distance matrix, only needed for incomplete distances
+        int ii;
+      for(ii=0;ii<n;ii++)
+      {if(!w[ii])continue;
+       error("path between %i and %i\n",ii,z);
+        int* ord=getPathBetween(ii,z,n,ed1,ed2,numEdges);
+
+        p=ii;
+        int newDist=0;
+        error("path");
+        for(i=0;i<2*n-2;i++)
+          {error("ord[%i]=%i ",i,ord[i]);
+          }
+       error("\n");
+       error("innn\n");
+        while(p!=z)
+          { //error("p=%i\n",p);
+            int aux=p;
+            p=ord[p];
+            newDist+=getLength(ii,z,ed1,ed2,numEdges,edLen);
+          }
+
+        error("outt\n");
+
+        d[ii][z]=d[z][ii]=newDist;
+       }*/
+        //update l[s] for all s not yet added
+        int s;
+        for(s=1;s<=n;s++)
+         {if(w[s])continue;
+           for(i=1;i<=n;i++)
+            {if(i==z)continue;
+               if(i!=x && i!=y)continue;//we only consider x and y as being other leaf
+                                        //and take the minimum of them as being new distance
+               double newL=0.5*(d[give_indexx(i,s,n)]+d[give_indexx(z,s,n)]-d[give_indexx(i,z,n)]);//one of leaves is
+                                                      //z, since
+                                                      //all pairs not cotaining z
+                                                      //will remain unchanged
+               if(newL<l[s])
+                  {
+                   l[s]=newL;
+                   st[s]=i;
+                   ed[s]=z;
+                  }
+            }
+         }
+        free(ord);
+        /*Rprintf("new tree\n");
+        for(i=0;i<2*n-3;i++)
+        {
+        Rprintf("%i->%i of length:%f \n",ed1[i],ed2[i],edLen[i]);
+        }
+        Rprintf("end new tree\n");*/
+      }
+   //for(i=0;i<=numEdges;i++){ed1[i]++;ed2[i]++;}
+ }
diff --git a/src/triangMtds.c b/src/triangMtds.c
new file mode 100644 (file)
index 0000000..7ec0bd7
--- /dev/null
@@ -0,0 +1,223 @@
+/* triangMtds.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+void triangMtds(double* d, int* np, int* ed1,int* ed2, double* edLen)
+ {
+    int n=*np;
+    int k=0;
+    int i=0;
+    int j=0;
+    int ij=-1;
+    int m[n+1];
+    int c[n+1];
+    int s[n+1];
+    int o[n+1];
+    for(i=1;i<=n;i++)
+    {m[i]=0;
+     c[i]=i;
+     s[i]=0;
+     for(j=1;j<=n;j++)
+      {
+        if(i==j){m[i]++;continue;}
+        if(d[give_index(i,j,n)]==-1)continue;
+        m[i]++;
+      }
+    }
+
+   for(i=1;i<n;i++)
+    for(j=i+1;j<=n;j++)
+      {
+        if(m[i]<m[j])
+         {
+            int aux=m[i];
+            m[i]=m[j];
+            m[j]=aux;
+            aux=c[i];
+            c[i]=c[j];
+            c[j]=aux;
+         }
+      }
+   ij=1;
+   while(m[ij]==n){ij++;}
+   for(i=ij;i<n;i++)
+     {if(s[c[i]]==1)continue;
+      for(j=i+1;j<=n;j++)
+      {
+       if(s[c[j]]==1)continue;
+       if(d[give_index(c[i],c[j],n)]==-1)
+         {s[c[j]]=1;
+         }
+      }
+     }
+
+   for(i=1;i<=n;i++)
+     {
+       if(s[i]==0)
+         {
+          k++;
+          o[k]=i;
+         }
+     }
+   double* sub_d=(double*)R_alloc(k*(k - 1)/2, sizeof(double));
+   ij=0;
+   for(i=1;i<n;i++)
+   {if(s[i]==1)continue;
+    for(j=i+1;j<=n;j++)
+     {
+      if(s[j]==1)continue;
+      sub_d[ij++]=d[give_index(i,j,n)];
+     }
+   }
+
+   triangMtd(sub_d,&k,ed1,ed2,edLen);
+   for(i=0;i<2*k-3;i++)
+     {
+       if(ed1[i]>k)
+        {
+          ed1[i]+=(n-k);
+        }
+       if(ed2[i]>k)
+        {
+          ed2[i]+=(n-k);
+        }
+     }
+   for(i=0;i<2*k-3;i++)
+     {
+      if(ed2[i]<=k)
+       {
+          ed2[i]=o[ed2[i]];
+       }
+     }
+   for(i=1;i<=n;i++)
+    {
+      if(s[i]==0)continue;//take only leaves not in Y
+      m[i]=0;
+      for(j=1;j<=n;j++)
+       {
+         if(s[j]==1)continue;//take only leaves already in Y
+         if(d[give_index(i,j,n)]==-1)continue;//igonore if distance unknown
+         m[i]++;
+       }
+    }
+ int numEdges=2*k-4;//0-based, so subtract 1
+ //Rprintf("numEdge=%i",numEdges);
+ int nv=(k-2)+n;
+ while(k<n)
+ {
+   //now find element in X\Y such that it has most known distances to already
+   //built tree until all leaves are added or we can not find a place to attach
+   //the new element
+   //s[i]=1 => i not added to tree
+    int max=-1;
+    int maxPos=-1;
+    for(i=1;i<=n;i++)
+    {
+     if(s[i]==0)continue;
+     if(m[i]>max)
+      {
+        max=m[i];
+        maxPos=i;
+      }
+    }
+   s[maxPos]=0;//mark maxPos as added
+   //calculate new m values for leaves not added, i.e we just increment any
+   //already present value by 1 if we know the distance between i and maxPos
+   for(i=1;i<=n;i++)
+    {
+      if(s[i]==0)continue;
+      if(d[give_index(i,maxPos,n)]==-1)continue;
+      m[i]++;
+    }
+
+   //find path to attach maxPos to, grow tree
+        double minDist=1e50;
+        int z=maxPos;
+        int x=-1,y=-1;
+        for(i=1;i<n;i++)
+        {if(s[i]==1 || d[give_index(i,z,n)]==-1 || i==z)continue;
+         for(j=i+1;j<=n;j++)
+          {
+            if(s[j]==1 || d[give_index(j,z,n)]==-1 || j==z)continue;
+            double tDist=(d[give_index(i,z,n)]+d[give_index(j,z,n)]-d[give_index(i,j,n)])/2;
+            if(tDist<minDist)
+             {
+                minDist=tDist;
+                x=i;
+                y=j;
+             }
+          }
+        }
+        if(x==-1 || y==-1)
+         {error("could not build tree from given distance matrix");
+         }
+        int* ord=getPathBetween(x,y,n,ed1,ed2,numEdges);
+        /*for(i=1;i<=2*n-2;i++)
+         {Rprintf("ord[%i]=%i ",i,ord[i]);
+         }
+        Rprintf("\n");*/
+        //look for the edge on the path x to y to subdivide
+
+        int p=x;
+        double sum=0;
+        double prevSum=0;
+        int aux=0;
+
+        int subdiv=-1;//index of edge to subdivide
+        //error("d[y,x]=%f,d[z,x]=%f,d[z,y]=%f\n",d[give_indexx(y,x,n)],d[give_indexx(z,x,n)],d[give_indexx(z,y,n)]);
+        double lx=0.5*(d[give_indexx(y,x,n)]+d[give_indexx(z,x,n)]-d[give_indexx(z,y,n)]);//distance of attachment
+       // Rprintf("adding %i on the path between %i and %i at a distance from x of %f and a distance of %f from tree",z,x,y,lx,minDist);
+        //point from x
+        //Rprintf("Adding leaf %i, between %i and %i\n",z,x,y);
+       // Rprintf("%i situated at a distance of %d from tree",z,minDist);
+        int sw=0;
+        //Rprintf("path between %i and %i\n",x,y);
+        //int cc=0;
+        while(p!=y && sum<lx)
+          { //cc++;
+            aux=p;
+            //error("%i -> %i ",p,ord[p]);
+            p=ord[p];
+            prevSum=sum;
+            for(i=0;i<=numEdges;i++)
+              {
+                if((ed1[i]==aux && ed2[i]==p)||(ed2[i]==aux && ed1[i]==p))
+                  {
+                    if(ed2[i]==aux && ed1[i]==p){sw=1;}
+                    subdiv=i;
+                    sum+=edLen[i];
+                  }
+              }
+            //if(cc>1000)error("failed to follow path between x=%i y=%i\n",x,y);
+          }
+
+
+        nv++;
+        //subdivide subdiv with a node labelled nv
+        //length calculation
+
+        int edd=ed2[subdiv];
+        ed2[subdiv]=nv;
+        edLen[subdiv]= (sw==1)?(lx-prevSum):(sum-lx);//check which 'half' of the
+                                                     //path the leaf belongs to
+                                                     //and updates accordingly
+        //error("sum=%f, prevsum=%f\n",sum,prevSum);
+        //error("lx-prevSum=%f, sum-lx=%f, minDist=%f",lx-prevSum,sum-lx,minDist);
+        //Rprintf("adding %i on path %i %i, at distance %f from %i, and %f from tree\n",z,x,y,lx,x,minDist);
+       // Rprintf("subdividing edge %i\n",subdiv);
+        numEdges++;
+        ed1[numEdges]=nv;
+        ed2[numEdges]=edd;
+        edLen[numEdges]= (sw==1)?(sum-lx):(lx-prevSum);
+        numEdges++;
+        edLen[numEdges]=minDist;
+        ed1[numEdges]=nv;
+        ed2[numEdges]=z;
+   k++;
+ }
+ }
diff --git a/src/ultrametric.c b/src/ultrametric.c
new file mode 100644 (file)
index 0000000..1c1d57a
--- /dev/null
@@ -0,0 +1,61 @@
+/* ultrametric.c    2011-10-11 */
+
+/* Copyright 2011 Andrei-Alin Popescu */
+
+/* This file is part of the R-package `ape'. */
+/* See the file ../COPYING for licensing issues. */
+
+#include "ape.h"
+
+void ultrametric(double *dd, int* np,int* mp,double *ret)//d received as dist object, -1 for missing entries
+{
+    int n=*np;
+    int m=*mp;
+    int i=0,j=0;
+    double max=dd[0];
+    double d[n][n];
+    for(i=1;i<n;i++)
+    {d[i-1][i-1]=0;
+     for(j=i+1;j<=n;j++)
+      {
+         d[i-1][j-1]=d[j-1][i-1]=dd[give_index(i,j,n)];
+         if(dd[give_index(i,j,n)]>max)
+          {
+            max=dd[give_index(i,j,n)];
+          }
+      }
+    }
+    d[n-1][n-1]=0;
+
+  int entrCh=0;
+   do{
+    entrCh=0;
+    for(i=0;i<n-1;i++)
+     for(j=i+1;j<n;j++)
+      {
+         if(d[i][j]!=-1)continue;
+         double minimax=max;
+         int k=0;
+         int sw=0;
+         for(k=0;k<n;k++)
+          {
+             if(d[i][k]==-1 || d[j][k]==-1)continue;
+             sw=1;
+             double mx = d[i][k] > d[j][k] ? d[i][k] : d[j][k];
+             if(mx<minimax){minimax=mx;}
+          }
+        if(sw==1)
+          {
+            d[i][j]=d[j][i]=minimax;
+            m--;
+            entrCh=1;
+          }
+      }
+   }while(entrCh==1);
+  int ij=0;
+  for(i=0;i<n;i++)
+   for(j=0;j<n;j++)
+    {
+       ret[ij++]=d[i][j];
+    }
+}