many changes!

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Thu, 16 Sep 2010 15:12:58 +0000 (15:12 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Thu, 16 Sep 2010 15:12:58 +0000 (15:12 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Thu, 16 Sep 2010 15:12:58 +0000 (15:12 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Thu, 16 Sep 2010 15:12:58 +0000 (15:12 +0000)
diff --git a/ChangeLog b/ChangeLog

index f92c51c79ab6a4b03291edf8ebd528884cd2884d..5a6817e06a65c287771809a14472393a88a7fe2c 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,21 @@
-               CHANGES IN APE VERSION 2.5-4
+               CHANGES IN APE VERSION 2.6
  
  
  NEW FEATURES
  
+    o The new functions rlineage and rbdtree simulate phylogenies under
+      any user-defined time-dependent speciation-extinction model. They
+      use new continuous time algorithms.
+
+    o The new function drop.fossil removes the extinct species from a
+      phylogeny.
+
+    o The new function MPR does most parsimonious reconstruction of
+      discrete characters.
+
+    o The new function Ftab computes the contingency table of base
+      frequencies from a pair of sequences.
+
      o There is now an 'as.list' method for the class "DNAbin".
  
      o dist.dna() can compute the number of transitions or transversions
@@ -15,13 +28,31 @@ NEW FEATURES
      o compar.gee() has been improved with the new option 'corStruct' as an
        alternative to 'phy' to specify the correlation structure, and
        calculation of the QIC (Pan 2001, Biometrics). The display of the
-      results have also been improved.
+      results has also been improved.
+
+    o read.GenBank() has a new option 'gene.names' to return the name of
+      the gene (FALSE by default).
  
  
  BUG FIXES
  
      o extract.clade() sometimes shuffled the tip labels.
  
+    o plot.phylo(type = "unrooted") did not force asp = 1 (thanks to Klaus
+      Schliep for the fix)
+
+    o dist.dna(model = "logdet") used to divide distances by 4. The
+      documentation has been clarified on the formulae used.
+
+
+OTHER CHANGES
+
+    o rTraitCont(model = "OU") has an option 'linear = TRUE' to possibly
+      change the parameterisation (see ?rTraitCont for details).
+
+    o pic() now returns a vector with the node labels of the tree (if
+      available) as names.
+
  
  
                 CHANGES IN APE VERSION 2.5-3
diff --git a/DESCRIPTION b/DESCRIPTION

index 26e5e3a7e07540d7fb21ee8c534f2f394909f87c..410085c4e01911bf602d52b508f63733d3d2e8fd 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: ape
-Version: 2.5-4
-Date: 2010-07-13
+Version: 2.6
+Date: 2010-09-16
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/R/CDF.birth.death.R b/R/CDF.birth.death.R

new file mode 100644 (file)

index 0000000..6f58fa4
--- /dev/null
+++ b/R/CDF.birth.death.R
@@ -0,0 +1,394 @@
+## CDF.birth.death.R (2010-08-09)
+
+##    Functions to simulate and fit
+##  Time-Dependent Birth-Death Models
+
+## Copyright 2010 Emmanuel Paradis
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+## compute an integral with a simple trapeze method
+## (apparently, Vectorize doesn't give faster calculation)
+integrateTrapeze <- function(FUN, from, to, nint = 10)
+{
+    x <- seq(from = from, to = to, length.out = nint + 1)
+    ## reorganized to minimize the calls to FUN:
+    out <- FUN(x[1]) + FUN(x[length(x)])
+    for (i in 2:nint) out <- out + 2 * FUN(x[i])
+    (x[2] - x[1]) * out/2 # (x[2] - x[1]) is the width of the trapezes
+}
+
+## case:
+## 1: birth and death rates constant
+## 2: no primitive available
+## 3: primitives are available
+## 4: death rate constant, no primitive available
+## 5: birth rate constant, no primitive available
+## 6: death rate constant, primitive available for birth(t)
+## 7: birth rate constant, primitive available for death(t)
+
+.getCase <- function(birth, death, BIRTH = NULL, DEATH = NULL)
+{
+    if (is.numeric(birth)) {
+        if (is.numeric(death)) 1 else {
+            if (is.null(DEATH)) 5 else 7
+        }
+    } else {
+        if (is.numeric(death)) {
+            if (is.null(BIRTH)) 4 else 6
+        } else if (is.null(BIRTH) || is.null(DEATH)) 2 else 3
+    }
+}
+
+###.getRHO <- function(birth, death, BIRTH = NULL, DEATH = NULL, case)
+###{
+###    ## build the RHO(), \rho(t), function
+###    switch (case,
+###            function(t1, t2) (t2 - t1)*(death - birth), # case 1
+###            function(t1, t2) # case 2
+###                integrate(function(t) death(t) - birth(t), t1, t2)$value,
+###            function(t1, t2)
+###                DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1), # case 3
+###            function(t1, t2)
+###                death - integrate(birth, t1, t2)$value, # case 4
+###            function(t1, t2)
+###                integrate(death, t1, t2)$value - birth, # case 5
+###            function(t1, t2) death - BIRTH(t2) + BIRTH(t1), # case 6
+###            function(t1, t2) DEATH(t2) - DEATH(t1) - birth # case 7
+###        )
+###}
+
+.getRHOetINT <- function(birth, death, BIRTH = NULL, DEATH = NULL, case, fast)
+{
+    ## build the RHO(), \rho(t), and INT(), I(t), functions
+    switch (case,
+        { # case 1:
+            RHO <- function(t1, t2) (t2 - t1)*(death - birth)
+            INT <- function(t) {
+                rho <- death - birth
+                death*(exp(rho*(Tmax - t)) - 1)/rho
+            }
+        },{ # case 2:
+            if (fast) {
+                RHO <- function(t1, t2)
+                    integrateTrapeze(function(t) death(t) - birth(t), t1, t2)
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death(u)
+                    integrateTrapeze(FOO, t, Tmax)
+                }
+            } else {
+                RHO <- function(t1, t2)
+                    integrate(function(t) death(t) - birth(t), t1, t2)$value
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death(u)
+                    integrate(Vectorize(FOO), t, Tmax)$value # Vectorize required
+                }
+            }
+        },{ # case 3:
+            RHO <- function(t1, t2)
+                DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1)
+            INT <- function(t) { # vectorized
+                FOO <- function(u) exp(RHO(tt, u)) * death(u)
+                out <- t
+                for (i in 1:length(t)) {
+                    tt <- t[i]
+                    out[i] <- integrate(FOO, tt, Tmax)$value
+                }
+                out
+            }
+        },{ # case 4:
+            if (fast) {
+                RHO <- function(t1, t2)
+                    death * (t2 - t1) - integrateTrapeze(birth, t1, t2)
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death
+                    integrateTrapeze(Vectorize(FOO), t, Tmax)
+                }
+            } else {
+                RHO <- function(t1, t2)
+                    death * (t2 - t1) - integrate(birth, t1, t2)$value
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death
+                    integrate(Vectorize(FOO), t, Tmax)$value
+                }
+            }
+        },{ # case 5:
+            RHO <- function(t1, t2)
+                integrate(death, t1, t2)$value - birth * (t2 - t1)
+            if (fast) {
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death(u)
+                    integrateTrapeze(FOO, t, Tmax)
+                }
+            } else {
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death(u)
+                    integrate(Vectorize(FOO), t, Tmax)$value
+                }
+            }
+        },{ # case 6:
+            RHO <- function(t1, t2) death * (t2 - t1) - BIRTH(t2) + BIRTH(t1)
+            INT <- function(t) { # vectorized
+                FOO <- function(u) exp(RHO(tt, u)) * death
+                out <- t
+                for (i in 1:length(t)) {
+                    tt <- t[i]
+                    out[i] <- integrate(FOO, tt, Tmax)$value
+                }
+                out
+            }
+        },{ # case 7:
+            RHO <- function(t1, t2) DEATH(t2) - DEATH(t1) - birth * (t2 - t1)
+            if (fast) {
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death(u)
+                    integrateTrapeze(FOO, t, Tmax)
+                }
+            } else {
+                INT <- function(t) {
+                    FOO <- function(u) exp(RHO(t, u)) * death(u)
+                    integrate(Vectorize(FOO), t, Tmax)$value
+                }
+            }
+        })
+    list(RHO, INT)
+}
+
+CDF.birth.death <-
+    function(birth, death, BIRTH = NULL, DEATH = NULL, Tmax, x, case, fast = FALSE)
+{
+    ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case, fast)
+    RHO <- ff[[1]]
+    INT <- ff[[2]]
+    environment(INT) <- environment() # so that INT() can find Tmax
+    .CDF.birth.death2(RHO, INT, birth, death, BIRTH, DEATH,
+                      Tmax, x, case, fast)
+}
+
+.CDF.birth.death2 <-
+    function(RHO, INT, birth, death, BIRTH, DEATH, Tmax, x, case, fast)
+{
+    Pi <- if (case %in% c(1, 5, 7))
+        function(t) (1/(1 + INT(t)))^2 * 2 * exp(-RHO(0, t)) * birth
+    else
+        function(t) (1/(1 + INT(t)))^2 * 2 * exp(-RHO(0, t)) * birth(t)
+
+    if (!case %in% c(1, 3, 6)) Pi <- Vectorize(Pi)
+
+    denom <- if (fast) integrateTrapeze(Pi, 0, Tmax) else integrate(Pi, 0, Tmax)$value
+    n <- length(x)
+    p <- numeric(n)
+    if (fast) {
+        for (i in 1:n) p[i] <- integrateTrapeze(Pi, 0, x[i])
+    } else {
+        for (i in 1:n) p[i] <- integrate(Pi, 0, x[i])$value
+    }
+    p/denom
+}
+
+.makePhylo <- function(edge, edge.length, i)
+{
+    NODES <- edge > 0
+    edge[NODES] <- edge[NODES] + i + 1L
+    edge[!NODES] <- 1:(i + 1L)
+
+    phy <- list(edge = edge, edge.length = edge.length,
+                tip.label = paste("t", 1:(i + 1), sep = ""), Nnode = i)
+    class(phy) <- "phylo"
+    phy
+}
+
+rlineage <-
+    function(birth, death, Tmax = 50, BIRTH = NULL, DEATH = NULL, eps = 1e-6)
+{
+    case <- .getCase(birth, death, BIRTH, DEATH)
+
+    rTimeToEvent <- function(t)
+    {
+        ## CDF of the times to event (speciation or extinction):
+        switch (case,
+            { # case 1:
+                Foo <- function(t, x)
+                    1 - exp(-(birth + death)*(x - t))
+            },{ # case 2:
+                Foo <- function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-integrate(function(t) birth(t) + death(t),
+                                       t, x)$value)
+                }
+            },{ # case 3:
+                Foo <- function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-(BIRTH(x) - BIRTH(t) + DEATH(x) - DEATH(t)))
+                }
+            },{ # case 4:
+                Foo <- function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-(integrate(function(t) birth(t), t, x)$value
+                              + death*(x - t)))
+                }
+
+            },{ # case 5:
+                Foo <- function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-(birth*(x - t) +
+                              integrate(function(t) death(t), t, x)$value))
+                }
+
+            },{ # case 6:
+                Foo <- function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-(BIRTH(x) - BIRTH(t) + death*(x - t)))
+                }
+
+            },{ # case 7:
+                Foo <- function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-(birth*(x - t) + DEATH(x) - DEATH(t) ))
+                }
+            })
+
+        ## generate a random time to event by the inverse method:
+        P <- runif(1)
+        ## in case speciation probability is so low
+        ## that time to speciation is infinite:
+        if (Foo(t, Tmax) < P) return(Tmax + 1)
+        inc <- 10
+        x <- t + inc
+        while (inc > eps) { # la précision influe sur le temps de calcul
+            if (Foo(t, x) > P) {
+                x <- x - inc
+                inc <- inc/10
+            } else x <- x + inc
+        }
+        x - t
+    }
+
+    if (case == 1)
+        speORext <- function(t) birth/(birth + death)
+    if (case == 2 || case == 3)
+        speORext <- function(t) birth(t)/(birth(t) + death(t))
+    if (case == 4 || case == 6)
+        speORext <- function(t) birth(t)/(birth(t) + death)
+    if (case == 5 || case == 7)
+        speORext <- function(t) birth/(birth + death(t))
+
+    ## the recursive function implementing algorithm 1
+    foo <- function(node) {
+        for (k in 0:1) {
+            X <- rTimeToEvent(t[node])
+            tmp <- t[node] + X
+            ## is the event a speciation or an extinction?
+            if (tmp >= Tmax) {
+                Y <- 0
+                tmp <- Tmax
+            } else Y <- rbinom(1, size = 1, prob = speORext(tmp))
+            j <<- j + 1L
+            edge.length[j] <<- tmp - t[node]
+            if (Y) {
+                i <<- i + 1L
+                t[i] <<- tmp
+                ## set internal edge:
+                edge[j, ] <<- c(node, i)
+                foo(i)
+            } else
+                ## set terminal edge:
+                edge[j, ] <<- c(node, 0L)
+        }
+    }
+
+    edge <- matrix(0L, 1e5, 2)
+    edge.length <- numeric(1e5)
+    j <- 0L; i <- 1; t <- 0
+    foo(1L)
+    .makePhylo(edge[1:j, ], edge.length[1:j], i)
+}
+
+drop.fossil <- function(phy, tol = 0)
+{
+    n <- Ntip(phy)
+    x <- dist.nodes(phy)[n + 1, ][1:n]
+    drop.tip(phy, which(x < max(x) - tol))
+}
+
+rbdtree <-
+    function(birth, death, Tmax = 50, BIRTH = NULL, DEATH = NULL, eps = 1e-6)
+{
+    case <- .getCase(birth, death, BIRTH, DEATH)
+    ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case, FALSE)
+    RHO <- ff[[1]]
+    INT <- ff[[2]]
+    ## so that RHO() and INT() can find Tmax:
+    environment(RHO) <- environment(INT) <- environment()
+
+    rtimetospe <- function(t)
+    {
+        ## CDF of the times to speciation:
+        Foo <- if (case %in% c(1, 5, 7))
+            function(t, x) 1 - exp(-birth*(x - t))
+        else {
+            if (case %in% c(3, 6))
+                function(t, x) 1 - exp(-(BIRTH(x) - BIRTH(t)))
+            else {
+                function(t, x) {
+                    if (t == x) return(0)
+                    1 - exp(-integrate(birth, t, x)$value)
+                }
+            }
+        }
+        ## generate a random time to speciation by the inverse method:
+        P <- runif(1)
+        ## in case speciation probability is so low
+        ## that time to speciation is infinite:
+        if (Foo(t, Tmax) < P) return(Tmax + 1)
+        inc <- 10
+        x <- t + inc
+        while (inc > eps) { # la précision influe sur le temps de calcul
+            if (Foo(t, x) > P) {
+                x <- x - inc
+                inc <- inc/10
+            } else x <- x + inc
+        }
+        x - t
+    }
+
+    ## the recursive function implementing algorithm 2
+    foo <- function(node, start) {
+        node <- node # make a local copy
+        for (k in 0:1) {
+            tau <- start # because tau is changed below
+            NoDesc <- TRUE
+            X <- rtimetospe(tau)
+            while (X < Tmax - tau) {
+                tau <- tau + X
+                ## does the new lineage survive until Tmax?
+                Y <- rbinom(1, size = 1, prob = 1/(1 + INT(tau)))
+                if (Y) {
+                    i <<- i + 1L
+                    t[i] <<- tau
+                    ## set internal edge:
+                    j <<- j + 1L
+                    edge[j, ] <<- c(node, i)
+                    edge.length[j] <<- tau - t[node]
+                    foo(i, t[i])
+                    NoDesc <- FALSE
+                    break
+                }
+                X <- rtimetospe(tau)
+            }
+            ## set terminal edge:
+            if (NoDesc) {
+                j <<- j + 1L
+                edge[j, 1] <<- node # the 2nd column is already set to 0
+                edge.length[j] <<- Tmax - t[node]
+            }
+        }
+    }
+
+    edge <- matrix(0L, 1e5, 2)
+    edge.length <- numeric(1e5)
+    j <- 0L; i <- 1L; t <- 0
+    foo(1L, 0)
+    .makePhylo(edge[1:j, ], edge.length[1:j], i)
+}
diff --git a/R/DNA.R b/R/DNA.R

index 1f4f5d6ae78e8367635651d4507a126594a581f6..a7285170ec3aef68cf4874043ae6386edd034d30 100644 (file)
--- a/R/DNA.R
+++ b/R/DNA.R
@@ -1,4 +1,4 @@
-## DNA.R (2010-07-14)
+## DNA.R (2010-09-01)
  
  ##   Manipulations and Comparisons of DNA Sequences
  
@@ -291,6 +291,37 @@ base.freq <- function(x, freq = FALSE)
      BF
  }
  
+Ftab <- function(x, y = NULL)
+{
+    if (is.null(y)) {
+        if (is.list(x)) {
+            y <- x[[2]]
+            x <- x[[1]]
+            if (length(x) != length(y))
+                stop("'x' and 'y' not of same lenght")
+        } else { # 'x' is a matrix
+            y <- x[2, , drop = TRUE]
+            x <- x[1, , drop = TRUE]
+        }
+    } else {
+        x <- as.vector(x)
+        y <- as.vector(y)
+        if (length(x) != length(y))
+            stop("'x' and 'y' not of same lenght")
+    }
+    out <- matrix(0, 4, 4)
+    k <- c(136, 40, 72, 24)
+    for (i in 1:4) {
+        a <- x == k[i]
+        for (j in 1:4) {
+            b <- y == k[j]
+            out[i, j] <- sum(a & b)
+        }
+    }
+    dimnames(out)[1:2] <- list(c("a", "c", "g", "t"))
+    out
+}
+
  GC.content <- function(x) sum(base.freq(x)[2:3])
  
  seg.sites <- function(x)
diff --git a/R/MPR.R b/R/MPR.R

new file mode 100644 (file)

index 0000000..d160c48
--- /dev/null
+++ b/R/MPR.R
@@ -0,0 +1,68 @@
+## MPR.R (2010-08-10)
+
+##   Most Parsimonious Reconstruction
+
+## Copyright 2010 Emmanuel Paradis
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+MPR <- function(x, phy, outgroup)
+{
+    if (is.rooted(phy))
+        stop("the tree must be unrooted")
+    if (!is.binary.tree(phy))
+        stop("the tree must be fully dichotomous")
+    if (length(outgroup) > 1L)
+        stop("outgroup must be a single tip")
+    if (is.character(outgroup))
+        outgroup <- which(phy$tip.label == outgroup)
+
+    if (!is.null(names(x))) {
+        if (all(names(x) %in% phy$tip.label))
+            x <- x[phy$tip.label]
+        else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.")
+    }
+
+    n <- length(phy$tip.label)
+    if (is.null(phy$node.label))
+        phy$node.label <- n + 1:(phy$Nnode)
+
+    phy <- drop.tip(root(phy, outgroup), outgroup)
+    n <- n - 1L
+    m <- phy$Nnode
+    phy <- reorder(phy, "pruningwise")
+
+    root.state <- x[outgroup]
+    I <- as.integer(x[-outgroup])
+    I[n + 1:m] <- NA
+    I <- cbind(I, I) # interval map
+
+    med <- function(x) {
+        i <- length(x)/2
+        sort(x)[c(i, i + 1L)]
+    }
+
+    ## 1st pass
+    s <- seq(from = 1, by = 2, length.out = m)
+    anc <- phy$edge[s, 1]
+    des <- matrix(phy$edge[, 2], ncol = 2, byrow = TRUE)
+    for (i in 1:m) I[anc[i], ] <- med(I[des[i, ], ])
+
+    ## 2nd pass
+    out <- matrix(NA, m, 2)
+    colnames(out) <- c("lower", "upper")
+    ## do the most basal node before looping
+    Iw <- as.vector(I[des[m, ], ]) # interval maps of the descendants
+    out[anc[m] - n, ] <- range(med(c(root.state, root.state, Iw)))
+    for (i in (m - 1):1) {
+        j <- anc[i]
+        Iw <- as.vector(I[des[i, ], ]) # interval maps of the descendants
+        k <- which(phy$edge[, 2] == j) # find the ancestor
+        tmp <- out[phy$edge[k, 1] - n, ]
+        out[j - n, 1] <- min(med(c(tmp[1], tmp[1], Iw)))
+        out[j - n, 2] <- max(med(c(tmp[2], tmp[2], Iw)))
+    }
+    rownames(out) <- phy$node.label
+    out
+}
diff --git a/R/ace.R b/R/ace.R

index 87ed1f2e60503d1e8de35d00ff94aafcd82582b6..12c5f99f0ec83befd8400dadb3694b0acff7e1ec 100644 (file)
--- a/R/ace.R
+++ b/R/ace.R
@@ -12,7 +12,7 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
                  scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1)
  {
      if (!inherits(phy, "phylo"))
-      stop('object "phy" is not of class "phylo".')
+        stop('object "phy" is not of class "phylo".')
      if (is.null(phy$edge.length))
          stop("tree has no branch lengths")
      type <- match.arg(type, c("continuous", "discrete"))
@@ -25,8 +25,7 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
      if (!is.null(names(x))) {
          if(all(names(x) %in% phy$tip.label))
            x <- x[phy$tip.label]
-        else warning('the names of argument "x" and the tip labels of the tree
-did not match: the former were ignored in the analysis.')
+        else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.")
      }
      obj <- list()
      if (kappa != 1) phy$edge.length <- phy$edge.length^kappa
@@ -164,7 +163,7 @@ as the number of categories in `x'")
              Q[] <- c(p, 0)[rate]
              diag(Q) <- -rowSums(Q)
              for (i  in seq(from = 1, by = 2, length.out = nb.node)) {
-                j <- i + 1
+                j <- i + 1L
                  anc <- phy$edge[i, 1]
                  des1 <- phy$edge[i, 2]
                  des2 <- phy$edge[j, 2]
@@ -233,11 +232,12 @@ anova.ace <- function(object, ...)
  
  print.ace <- function(x, digits = 4, ...)
  {
-    cat("\n    Ancestral Character Reconstruction\n\n")
+    cat("\n    Ancestral Character Estimation\n\n")
      cat("Call: ")
      print(x$call)
      cat("\n")
-    cat("    Log-likelihood:", x$loglik, "\n\n")
+    if (!is.null(x$loglik))
+        cat("    Log-likelihood:", x$loglik, "\n\n")
      ratemat <- x$index.matrix
      if (is.null(ratemat)) { # to be improved
          class(x) <- NULL
diff --git a/R/collapse.singles.R b/R/collapse.singles.R

index 9762bb0538c4f91cb8fad224a9df0701d7869695..bbde0a17e003ce464a68dab76afa7d4aa412b416 100644 (file)
--- a/R/collapse.singles.R
+++ b/R/collapse.singles.R
@@ -1,4 +1,4 @@
-## collapse.singles.R (2008-06-19)
+## collapse.singles.R (2010-07-23)
  
  ##    Collapse "Single" Nodes
  
@@ -32,7 +32,7 @@ collapse.singles <- function(tree)
              xmat <- xmat[xmat[, 1] != i, ] # drop
              ## changed by EP for the new coding of "phylo" (2006-10-05):
              ## xmat[xmat < i] <- xmat[xmat < i] + 1 ## adjust indices
-            xmat[xmat > i] <- xmat[xmat > i] - 1 ## adjust indices
+            xmat[xmat > i] <- xmat[xmat > i] - 1L ## adjust indices # changed '1' by '1L' (2010-07-23)
              ## END
              elen[prev.node] <- elen[prev.node] + elen[next.node]
              ## added by Elizabeth Purdom (2008-06-19):
diff --git a/R/compar.gee.R b/R/compar.gee.R

index caead0776b8dc6161a14034805071bdc89fbcc45..b2a9c1eb7eefa9acead17c37b1519b4507848b90 100644 (file)
--- a/R/compar.gee.R
+++ b/R/compar.gee.R
@@ -57,7 +57,7 @@ compar.gee <-
      ## <FIXME>
      ## maybe need to refine below in case of non-Brownian corStruct
      if (!missing(corStruct)) phy <- attr(corStruct, "tree")
-    dfP <- sum(phy$edge.length)*N / sum(diag(R))
+    dfP <- sum(phy$edge.length)*N / sum(diag(vcv(phy))) # need the variances
      ## </FIXME>
  
      ## compute QIC:
diff --git a/R/dist.topo.R b/R/dist.topo.R

index eb9ba6f81e6f0410d3b68d16c5d6335f4739ff7f..86b396b9aa9814abade98b2c5bd86c3f75fe25ee 100644 (file)
--- a/R/dist.topo.R
+++ b/R/dist.topo.R
@@ -11,7 +11,7 @@
  dist.topo <- function(x, y, method = "PH85")
  {
      if (method == "score" && (is.null(x$edge.length) || is.null(y$edge.length)))
-        stop("trees must have branch lengths for Billera et al.'s distance.")
+        stop("trees must have branch lengths for branch score distance.")
      nx <- length(x$tip.label)
      x <- unroot(x)
      y <- unroot(y)
@@ -211,7 +211,10 @@ boot.phylo <- function(phy, x, FUN, B = 100, block = 1, trees = FALSE)
      storage.mode(phy$Nnode) <- "integer"
      ans <- attr(.Call("prop_part", c(list(phy), boot.tree),
                        B + 1, FALSE, PACKAGE = "ape"), "number") - 1
-    if (trees) ans <- list(BP = ans, trees = boot.tree)
+    if (trees) {
+        class(boot.tree) <- "multiPhylo"
+        ans <- list(BP = ans, trees = boot.tree)
+    }
      ans
  }
  
diff --git a/R/pic.R b/R/pic.R

index b3d2f6680edd0dc9a94d5ff8cabf66c45d81487a..140743987e9757bd605470f08be45cca1190194d 100644 (file)
--- a/R/pic.R
+++ b/R/pic.R
@@ -1,8 +1,8 @@
-## pic.R (2009-05-10)
+## pic.R (2010-08-18)
  
  ##   Phylogenetically Independent Contrasts
  
-## Copyright 2002-2009 Emmanuel Paradis
+## Copyright 2002-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
@@ -64,9 +64,12 @@ pic <- function(x, phy, scaled = TRUE, var.contrasts = FALSE)
      ##
      ##}
      contr <- ans[[7]]
+    lbls <-
+        if (is.null(phy$node.label)) as.character(1:nb.node + nb.tip)
+        else phy$node.label
      if (var.contrasts) {
          contr <- cbind(contr, ans[[8]])
-        dimnames(contr) <- list(1:nb.node + nb.tip, c("contrasts", "variance"))
-    } else names(contr) <- 1:nb.node + nb.tip
+        dimnames(contr) <- list(lbls, c("contrasts", "variance"))
+    } else names(contr) <- lbls
      contr
  }
diff --git a/R/plot.phylo.R b/R/plot.phylo.R

index 631fb4dc35029d0b52f94a361cb789cf1a35e0ae..df246bd7399787afb9294fcf3e7d5f029889142e 100644 (file)
--- a/R/plot.phylo.R
+++ b/R/plot.phylo.R
@@ -1,4 +1,4 @@
-## plot.phylo.R (2010-03-19)
+## plot.phylo.R (2010-08-12)
  
  ##   Plot Phylogenies
  
@@ -276,7 +276,7 @@ plot.phylo <- function(x, type = "phylogram", use.edge.length = TRUE,
          if (direction == "leftwards") x.lim[2] <- x.lim[2] + x$root.edge
          if (direction == "downwards") y.lim[2] <- y.lim[2] + x$root.edge
      }
-    asp <- if (type %in% c("fan", "radial")) 1 else NA # fix by Klaus Schliep (2008-03-28)
+    asp <- if (type %in% c("fan", "radial", "unrooted")) 1 else NA # fixes by Klaus Schliep (2008-03-28 and 2010-08-12)
      plot(0, type = "n", xlim = x.lim, ylim = y.lim, ann = FALSE, axes = FALSE, asp = asp, ...)
      if (is.null(adj))
          adj <- if (phyloORclado && direction == "leftwards") 1 else 0
diff --git a/R/rTrait.R b/R/rTrait.R

index 8cc48e17c9e45296c082b33ca33f29e01c3f3296..2c04dc6dd0855e246de03107d4b70017386baab3 100644 (file)
--- a/R/rTrait.R
+++ b/R/rTrait.R
@@ -1,4 +1,4 @@
-## rTrait.R (2010-05-06)
+## rTrait.R (2010-07-26)
  
  ##   Trait Evolution
  
@@ -18,7 +18,7 @@ rTraitDisc <-
          stop("at least one branch length negative")
  
      if (is.character(model)) {
-        switch(model, "ER" = {
+        switch(toupper(model), "ER" = {
                     if (length(rate) != 1)
                         stop("`rate' must have one element")
                     Q <- matrix(rate, k, k)
@@ -81,8 +81,8 @@ rTraitDisc <-
  }
  
  rTraitCont <-
-    function(phy, model = "BM", sigma = 0.1, alpha = 1,
-             theta = 0, ancestor = FALSE, root.value = 0)
+    function(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0,
+             ancestor = FALSE, root.value = 0, linear = TRUE)
  {
      if (is.null(phy$edge.length))
          stop("tree has no branch length")
@@ -104,7 +104,7 @@ rTraitCont <-
          environment(model) <- environment()
          for (i in N:1) x[des[i]] <- model(x[anc[i]], el[i])
      } else {
-        model <- pmatch(model, c("BM", "OU"))
+        model <- pmatch(toupper(model), c("BM", "OU"))
          if (length(sigma) == 1) sigma <- rep(sigma, N)
          else if (length(sigma) != N)
              stop("'sigma' must have one or Nedge(phy) elements")
@@ -115,6 +115,7 @@ rTraitCont <-
              if (length(theta) == 1) theta <- rep(theta, N)
              else if (length(theta) != N)
                  stop("'theta' must have one or Nedge(phy) elements")
+            if (!linear) model <- model + 1L
          }
          .C("rTraitCont", as.integer(model), as.integer(N), as.integer(anc - 1L), as.integer(des - 1L), el, sigma, alpha, theta, x, DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
      }
diff --git a/R/read.GenBank.R b/R/read.GenBank.R

index e84213b895351736dea0061c0c7579b357cba6c6..527f2dd51474cae6ef9b29a5bb9fcdfd2042b54c 100644 (file)
--- a/R/read.GenBank.R
+++ b/R/read.GenBank.R
@@ -1,14 +1,15 @@
-## read.GenBank.R (2007-06-27)
+## read.GenBank.R (2010-07-22)
  
  ##   Read DNA Sequences from GenBank via Internet
  
-## Copyright 2002-2007 Emmanuel Paradis
+## Copyright 2002-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
-read.GenBank <- function(access.nb, seq.names = access.nb,
-                         species.names = TRUE, as.character = FALSE)
+read.GenBank <-
+    function(access.nb, seq.names = access.nb, species.names = TRUE,
+             gene.names = FALSE, as.character = FALSE)
  {
      N <- length(access.nb)
      ## If there are more than 400 sequences, we need to break down the
@@ -26,8 +27,7 @@ read.GenBank <- function(access.nb, seq.names = access.nb,
      }
      FI <- grep("^ {0,}ORIGIN", X) + 1
      LA <- which(X == "//") - 1
-    obj <- list()
-    length(obj) <- N
+    obj <- vector("list", N)
      for (i in 1:N) {
          ## remove all spaces and digits
          tmp <- gsub("[[:digit:] ]", "", X[FI[i]:LA[i]])
@@ -39,8 +39,15 @@ read.GenBank <- function(access.nb, seq.names = access.nb,
          tmp <- character(N)
          sp <- grep("ORGANISM", X)
          for (i in 1:N)
-          tmp[i] <- unlist(strsplit(X[sp[i]], " +ORGANISM +"))[2]
+            tmp[i] <- unlist(strsplit(X[sp[i]], " +ORGANISM +"))[2]
          attr(obj, "species") <- gsub(" ", "_", tmp)
      }
+    if (gene.names) {
+        tmp <- character(N)
+        sp <- grep(" +gene +<", X)
+        for (i in 1:N)
+            tmp[i] <- unlist(strsplit(X[sp[i + 1L]], " +/gene=\""))[2]
+        attr(obj, "gene") <- gsub("\"$", "", tmp)
+    }
      obj
  }
diff --git a/man/DNAbin.Rd b/man/DNAbin.Rd

index 1b0ad705bccbc7c3a31da72528d462719bec7a62..bce0df2da8998c237f3a22dc7c78bd19627e1669 100644 (file)
--- a/man/DNAbin.Rd
+++ b/man/DNAbin.Rd
@@ -6,6 +6,7 @@
  \alias{cbind.DNAbin}
  \alias{as.matrix.DNAbin}
  \alias{c.DNAbin}
+\alias{as.list.DNAbin}
  \alias{labels.DNAbin}
  \title{Manipulate DNA Sequences in Bit-Level Format}
  \description{
@@ -20,6 +21,7 @@
  \method{[}{DNAbin}(x, i, j, drop = FALSE)
  \method{as.matrix}{DNAbin}(x, \dots)
  \method{c}{DNAbin}(\dots, recursive = FALSE)
+\method{as.list}{DNAbin}(x, \dots)
  \method{labels}{DNAbin}(object, \dots)
  }
  \arguments{
@@ -70,7 +72,7 @@
  
    \code{as.matrix} may be used to convert DNA sequences (of the same
    length) stored in a list into a matrix while keeping the names and the
-  class.
+  class. \code{as.list} does the reverse operation.
  }
  \value{
    an object of class \code{"DNAbin"} in the case of \code{rbind},
diff --git a/man/MPR.Rd b/man/MPR.Rd

new file mode 100644 (file)

index 0000000..98b5a0c
--- /dev/null
+++ b/man/MPR.Rd
@@ -0,0 +1,69 @@
+\name{MPR}
+\alias{MPR}
+\title{Most Parsimonious Reconstruction}
+\description{
+  This function does ancestral character reconstruction by parsimony as
+  described in Hanazawa et al. (1995) and modified by Narushima and
+  Hanazawa (1997).
+}
+\usage{
+MPR(x, phy, outgroup)
+}
+\arguments{
+  \item{x}{a vector of integers.}
+  \item{phy}{an object of class \code{"phylo"}; the tree must be
+    unrooted and fully dichotomous.}
+  \item{outgroup}{an integer or a character string giving the tip of
+    \code{phy} used as outgroup.}
+}
+\details{
+  Hanazawa et al. (1995) and Narushima and Hanazawa (1997) used Farris's
+  (1970) and Swofford and Maddison's (1987) framework to reconstruct
+  ancestral states using parsimony. The character is assumed to take
+  integer values. The algorithm finds the sets of values for each node
+  as intervals with lower and upper values.
+
+  It is recommended to root the tree with the outgroup before the
+  analysis, so plotting the values with \code{\link{nodelabels}} is
+  simple.
+}
+\value{
+  a matrix of integers with two columns named ``lower'' and ``upper''
+  giving the lower and upper values of the reconstructed sets for each
+  node.
+}
+\references{
+  Farris, J. M. (1970) Methods for computing Wagner trees.
+  \emph{Systematic Zoology}, \bold{19}, 83--92.
+
+  Hanazawa, M., Narushima, H. and Minaka, N. (1995) Generating most
+  parsimonious reconstructions on a tree: a generalization of the
+  Farris--Swofford--Maddison method. \emph{Discrete Applied
+    Mathematics}, \bold{56}, 245--265.
+
+  Narushima, H. and Hanazawa, M. (1997) A more efficient algorithm for
+  MPR problems in phylogeny. \emph{Discrete Applied Mathematics},
+  \bold{80}, 231--238.
+
+  Swofford, D. L. and Maddison, W. P. (1987) Reconstructing ancestral
+  character states under Wagner parsimony. \emph{Mathematical
+    Biosciences}, \bold{87}, 199--229.
+}\author{Emmanuel Paradis}
+\seealso{
+  \code{\link{ace}}, \code{\link{root}}, \code{\link{nodelabels}}
+}
+\examples{
+## the example in Narushima and Hanazawa (1997):
+tr <- read.tree(text = "(((i,j)c,(k,l)b)a,(h,g)e,f)d;")
+x <- c(1, 3, 0, 6, 5, 2, 4)
+names(x) <- letters[6:12]
+(o <- MPR(x, tr, "f"))
+plot(tr)
+nodelabels(paste("[", o[, 1], ",", o[, 2], "]", sep = ""))
+tiplabels(rev(x), adj = -2)
+## some random data:
+x <- rpois(30, 1)
+tr <- rtree(30, rooted = FALSE)
+MPR(x, tr, "t1")
+}
+\keyword{models}
diff --git a/man/ape-internal.Rd b/man/ape-internal.Rd

index 7aaad77b8a88f34d42c05b38c9666c6e4b72f501..73d53e6cf85a1c829dc85cff67aaa6d46e4aeba5 100644 (file)
--- a/man/ape-internal.Rd
+++ b/man/ape-internal.Rd
@@ -27,6 +27,8 @@
  \alias{getMRCA}
  \alias{plotCophylo2}
  \alias{plotPhyloCoor}
+\alias{integrateTrapeze}
+\alias{CDF.birth.death}
  \title{Internal Ape Functions}
  \description{
    Internal \pkg{ape} functions.
diff --git a/man/base.freq.Rd b/man/base.freq.Rd

index dcb564e6329f6263eb9d1ca4f69f7708b48c7e46..bef852991d3c322fce28f83d32895196edfd1361 100644 (file)
--- a/man/base.freq.Rd
+++ b/man/base.freq.Rd
@@ -1,32 +1,52 @@
  \name{base.freq}
  \alias{base.freq}
+\alias{Ftab}
  \title{Base frequencies from DNA Sequences}
+\description{
+  \code{base.freq} computes the frequencies (absolute or relative) of
+  the four DNA bases (adenine, cytosine, guanine, and thymidine) from a
+  sample of sequences.
+
+  \code{Ftab} computes the contingency table with the absolute
+  frequencies of the DNA bases from a pair of sequences.
+}
  \usage{
  base.freq(x, freq = FALSE)
+Ftab(x, y = NULL)
  }
  \arguments{
    \item{x}{a vector, a matrix, or a list which contains the DNA
      sequences.}
+  \item{y}{a vector with a single DNA sequence.}
    \item{freq}{a logical specifying whether to return the proportions
      (the default) or the absolute frequencies (counts).}
  }
-\description{
-  This function computes the relative frequencies (i.e. proportions) of
-  the four DNA bases (adenine, cytosine, guanine, and thymidine) from a
-  sample of sequences.
-}
  \details{
    The base frequencies are computed over all sequences in the
    sample. All missing or unknown sites are discarded from the
    computations.
+
+  For \code{Ftab}, if the argument \code{y} is given then both \code{x}
+  and \code{y} are coerced as vectors and must be of equal length. If
+  \code{y} is not given, \code{x} must be a matrix or a list and only
+  the two first sequences are used.
  }
  \value{
-  A numeric vector storing the relative frequencies with names
-  \code{c("a", "c", "g", "t")}.
+  A numeric vector with names \code{c("a", "c", "g", "t")}, or a four by
+  four matrix with similar dimnames.
  }
  \author{Emmanuel Paradis}
  \seealso{
    \code{\link{GC.content}}, \code{\link{seg.sites}},
    \code{\link{nuc.div}}, \code{\link{DNAbin}}
  }
+\examples{
+data(woodmouse)
+base.freq(woodmouse)
+base.freq(woodmouse, TRUE)
+Ftab(woodmouse)
+Ftab(woodmouse[1, ], woodmouse[2, ]) # same than above
+Ftab(woodmouse[14:15, ]) # between the last two
+}
  \keyword{univar}
+\keyword{manip}
diff --git a/man/dist.dna.Rd b/man/dist.dna.Rd

index 394baf57ba2ec75423650b190c66d505033c2341..3a5e3e027cb27d599bac0fd9d4b7ba04894e5b93 100644 (file)
--- a/man/dist.dna.Rd
+++ b/man/dist.dna.Rd
@@ -109,7 +109,11 @@ dist.dna(x, model = "K80", variance = FALSE,
      transitons and transversions.}
  
    \item{``logdet''}{The Log-Det distance, developed by Lockhart et
-    al. (1994), is related to BH87. However, this distance is symmetric.}
+    al. (1994), is related to BH87. However, this distance is
+    symmetric. Formulae from Gu and Li (1996) are used.
+    \code{dist.logdet} in \pkg{phangorn} uses a different
+    implementation that gives substantially different distances for
+    low-diverging sequences.}
  
    \item{``paralin''}{Lake (1994) developed the paralinear distance which
      can be viewed as another variant of the Barry--Hartigan distance.}
@@ -139,6 +143,11 @@ dist.dna(x, model = "K80", variance = FALSE,
    sequences of unequal base compositions. \emph{Proceedings of the
      National Academy of Sciences USA}, \bold{92}, 11317--11321.
  
+  Gu, X. and Li, W.-H. (1996) Bias-corrected paralinear and LogDet
+  distances and tests of molecular clocks and phylogenies under
+  nonstationary nucleotide frequencies. \emph{Molecular Biology and
+    Evolution}, \bold{13}, 1375--1383.
+
    Jukes, T. H. and Cantor, C. R. (1969) Evolution of protein
    molecules. in \emph{Mammalian Protein Metabolism}, ed. Munro, H. N.,
    pp. 21--132, New York: Academic Press.
diff --git a/man/dist.topo.Rd b/man/dist.topo.Rd

index 2017246d41cee719e9fe27e391175866f7ee57dc..caf1b5b6f4eaf5d1757cde044a7a843f9003aa68 100644 (file)
--- a/man/dist.topo.Rd
+++ b/man/dist.topo.Rd
@@ -34,7 +34,8 @@ dist.topo(x, y, method = "PH85")
    similar bipartitions (or splits) in both trees.
  }
  \note{
-  The geodesic distance of Billera et al. (2001) has been disabled.
+  The geodesic distance of Billera et al. (2001) has been disabled: see
+  the package \pkg{distory} on CRAN.
  }
  \references{
    Billera, L. J., Holmes, S. P. and Vogtmann, K. (2001) Geometry of the
diff --git a/man/mixedFontLabel.Rd b/man/mixedFontLabel.Rd

index 16997156e6aadf3ff935996031273d2716b20083..1736627b9472b12a54a3e259960e4caeeb279887 100644 (file)
--- a/man/mixedFontLabel.Rd
+++ b/man/mixedFontLabel.Rd
@@ -55,7 +55,7 @@ tr$tip.label <- mixedFontLabel(genus, species, geo, italic = 1:2,
    parenthesis = 3)
  layout(matrix(c(1, 2), 2))
  plot(tr)
-tr$tip.label <- mixedFontLabel(genus, species, geo, sep = c(" ", " - "),
+tr$tip.label <- mixedFontLabel(genus, species, geo, sep = c(" ", " | "),
    italic = 1:2, bold = 3)
  plot(tr)
  layout(1)
diff --git a/man/rTraitCont.Rd b/man/rTraitCont.Rd

index 01d82e2bb60c936342b31872d53ba31101c8c487..6ff347846cad82bbb8a98139980b18864bb18dc2 100644 (file)
--- a/man/rTraitCont.Rd
+++ b/man/rTraitCont.Rd
@@ -2,8 +2,8 @@
  \alias{rTraitCont}
  \title{Continuous Character Simulation}
  \usage{
-rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1,
-           theta = 0, ancestor = FALSE, root.value = 0)
+rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0,
+           ancestor = FALSE, root.value = 0, linear = TRUE)
  }
  \arguments{
    \item{phy}{an object of class \code{"phylo"}.}
@@ -20,6 +20,8 @@ rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1,
      values at the nodes as well (by default, only the values at the tips
      are returned).}
    \item{root.value}{a numeric giving the value at the root.}
+  \item{linear}{a logical indicating which parameterisation of the OU
+    model to use (see details).}
  }
  \description{
    This function simulates the evolution of a continuous character along a
@@ -41,18 +43,28 @@ rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1,
    \item{\code{"OU"}:}{an Ornstein-Uhlenbeck model is used. The above
    indexing rule is used for the three parameters \code{sigma},
    \code{alpha}, and \code{theta}. This may be more interesting for the
-  last one to model varying phenotypic optima. Be careful that large
-  values of \code{alpha} may give unrealistic output.}
+  last one to model varying phenotypic optima.
+
+  By default the following formula is used:
+
+  \deqn{x_{t''} = x_{t'} - \alpha l (x_{t'} - \theta) + \sigma
+    l \epsilon}{x(t'') = x(t') - alpha l (x(t') - theta) + sigma
+    l epsilon}
+
+  where \eqn{l (= t'' - t')} is the branch length, and \eqn{\epsilon
+  \sim N(0, 1)}{\epsilon ~ N(0, 1)}. If \eqn{\alpha > 1}{alpha > 1},
+  this may lead to chaotic oscillations. Thus an alternative
+  parameterisation is used if \code{linear = FALSE}:
+
+  \deqn{x_{t''} = x_{t'} - (1 - exp(-\alpha l)) * (x_{t'} - \theta) +
+    \sigma l \epsilon}{x(t'') = x(t') - (1 - exp(-alpha l)) * (x(t') -
+    theta) + sigma l epsilon}}
  
    \item{A function:}{it must be of the form \code{foo(x, l)} where
    \code{x} is the trait of the ancestor and \code{l} is the branch
    length. It must return the value of the descendant. The arguments
    \code{sigma}, \code{alpha}, and \code{theta} are ignored.}
  }}
-\note{
-  Currently, the OU model is a bit difficult to tune. Hopefully, this
-  may be improved in the future.
-}
  \value{
    A numeric vector with names taken from the tip labels of
    \code{phy}. If \code{ancestor = TRUE}, the node labels are used if
diff --git a/man/read.GenBank.Rd b/man/read.GenBank.Rd

index 9783e1232f763d0788331f14bbd445664406f2bb..d7ea055d2fa9ab61b5ab705f6ecd5676c7d2a0bc 100644 (file)
--- a/man/read.GenBank.Rd
+++ b/man/read.GenBank.Rd
@@ -2,8 +2,8 @@
  \alias{read.GenBank}
  \title{Read DNA Sequences from GenBank via Internet}
  \usage{
-read.GenBank(access.nb, seq.names = access.nb,
-             species.names = TRUE, as.character = FALSE)
+read.GenBank(access.nb, seq.names = access.nb, species.names = TRUE,
+             gene.names = FALSE, as.character = FALSE)
  }
  \arguments{
    \item{access.nb}{a vector of mode character giving the accession numbers.}
@@ -11,6 +11,10 @@ read.GenBank(access.nb, seq.names = access.nb,
      accession numbers are used.}
    \item{species.names}{a logical indicating whether to attribute the
      species names to the returned object.}
+  \item{gene.names}{a logical indicating whether to attribute the
+    gene names to the returned object. It is \code{FALSE} by default
+    because this will work correctly only when reading sequences with a
+    single gene.}
    \item{as.character}{a logical controlling whether to return the
      sequences as an object of class \code{"DNAbin"} (the default).}
  }
@@ -29,6 +33,11 @@ read.GenBank(access.nb, seq.names = access.nb,
    If \code{species.names = TRUE}, the returned list has an attribute
    \code{"species"} containing the names of the species taken from the
    field ``ORGANISM'' in GenBank.
+
+  If \code{gene.names = TRUE}, the returned list has an attribute
+  \code{"gene"} containing the names of the gene. This will not work
+  correctly if reading a sequence with multiple genes (e.g., a
+  mitochondrial genome).
  }
  \seealso{
    \code{\link{read.dna}}, \code{\link{write.dna}},
diff --git a/man/rlineage.Rd b/man/rlineage.Rd

new file mode 100644 (file)

index 0000000..d441245
--- /dev/null
+++ b/man/rlineage.Rd
@@ -0,0 +1,68 @@
+\name{rlineage}
+\alias{rlineage}
+\alias{rbdtree}
+\alias{drop.fossil}
+\title{Tree Simulation Under the Time-Dependent Birth--Death Models}
+\description{
+  These two functions simulate phylogenies under any time-dependent
+  birth--death model. \code{lineage} generates a complete tree including
+  the species that go extinct; \code{rbdtree} generates a tree with only
+  the species until present; \code{drop.fossil} is a utility function to
+  remove the extinct species.
+}
+\usage{
+rlineage(birth, death, Tmax = 50, BIRTH = NULL,
+         DEATH = NULL, eps = 1e-6)
+rbdtree(birth, death, Tmax = 50, BIRTH = NULL,
+        DEATH = NULL, eps = 1e-6)
+drop.fossil(phy, tol = 0)
+}
+\arguments{
+  \item{birth, death}{a numeric value or a (vectorized) function
+    specifying how speciation and extinction through time.}
+  \item{Tmax}{a numeric value giving the length of the simulation.}
+  \item{BIRTH, DEATH}{a (vectorized) function which is the primitive
+    of \code{birth} or \code{death}. This can be used to speed-up the
+    computation. By default, numerical integration is done.}
+  \item{eps}{a numeric value giving the time resolution of the
+    simulation; this may be increased (e.g., 0.001) to shorten
+    computation times.}
+  \item{phy}{an object of class \code{"phylo"}.}
+  \item{tol}{a numeric value giving the tolerance to consider a species
+    as extinct.}
+}
+\details{
+  Both functions use continuous-time algorithms described in the
+  references. The models are time-dependent birth--death models as
+  described in Kendall (1948). Speciation (birth) and extinction (death)
+  rates may be constant or vary through time according to an R function
+  specified by the user. In the latter case, \code{BIRTH} and/or
+  \code{DEATH} may be used of the primitives of \code{birth} and
+  \code{death} are known. In these functions time is the formal argument
+  and must be named \code{t}.
+}
+\value{
+  An object of class \code{"phylo"}.
+}
+\references{
+  Kendall, D. G. (1948) On the generalized ``birth-and-death''
+  process. \emph{Annals of Mathematical Statistics}, \bold{19}, 1--15.
+
+  Paradis, E. (2010) Time-dependent speciation and extinction from
+  phylogenies: a least squares approach. (under revision)
+  %\emph{Evolution}, \bold{59}, 1--12.
+}
+\author{Emmanuel Paradis}
+\seealso{
+  \code{\link{yule}}, \code{\link{yule.time}}, \code{\link{birthdeath}},
+  \code{\link{rtree}}, \code{\link{stree}}
+}
+\examples{
+plot(rlineage(0.1, 0)) # Yule process with lambda = 0.1
+plot(rlineage(0.1, 0.05)) # simple birth-death process
+b <- function(t) 1/(1 + exp(0.1*t - 2)) # logistic
+layout(matrix(1:2, 1))
+plot(rlineage(b, 0.01))
+plot(rbdtree(b, 0.01))
+}
+\keyword{datagen}
diff --git a/man/yule.time.Rd b/man/yule.time.Rd

index e5904e215420e391f4c13b2e7e9d409ec1ba5abf..fdbe0e7c3a385e3c737fd7f5f669ffaed916b3ca 100644 (file)
--- a/man/yule.time.Rd
+++ b/man/yule.time.Rd
@@ -44,7 +44,7 @@ yule.time(phy, birth, BIRTH = NULL, root.time = 0, opti = "nlm", start = 0.01)
    the log-likelihood function.
  }
  \value{
-  An object of class "yule" (see \code{\link{yule}}).
+  An object of class \code{"yule"} (see \code{\link{yule}}).
  }
  \author{Emmanuel Paradis}
  \seealso{
diff --git a/src/dist_dna.c b/src/dist_dna.c

index a289f159e5bafad3c353845b7a364c3c99d3c6e2..b8797e955b0647d88ed547e68c22119c9a0112fd 100644 (file)
--- a/src/dist_dna.c
+++ b/src/dist_dna.c
@@ -1,4 +1,4 @@
-/* dist_dna.c       2010-07-14 */
+/* dist_dna.c       2010-09-16 */
  
  /* Copyright 2005-2010 Emmanuel Paradis
  
@@ -742,7 +742,7 @@ void distDNA_GG95_pairdel(unsigned char *x, int *n, int *s, double *d,
  
  #define COMPUTE_DIST_LogDet\
      for (k = 0; k < 16; k++) Ftab[k] = ((double) Ntab[k]/L);\
-    d[target] = (-log(detFourByFour(Ftab))/4 - LN4)/4;\
+    d[target] = -log(detFourByFour(Ftab))/4 - LN4;\
      if (*variance) {\
          /* For the inversion, we first make U an identity matrix */\
          for (k = 1; k < 15; k++) U[k] = 0;\
diff --git a/src/rTrait.c b/src/rTrait.c

index e5922030774e2a76fcb662c44ebaf96dc3ceddcc..11cc63e2a0bdb2a66105873e690ab1e03d115074 100644 (file)
--- a/src/rTrait.c
+++ b/src/rTrait.c
@@ -1,4 +1,4 @@
-/* rTrait.c       2010-06-23 */
+/* rTrait.c       2010-07-26 */
  
  /* Copyright 2010 Emmanuel Paradis */
  
@@ -22,7 +22,13 @@ void rTraitCont(int *model, int *Nedge, int *edge1, int *edge2, double *el,
                 break;
         case 2 : for (i = *Nedge - 1; i >= 0; i--) {
                         GetRNGstate();
-                       x[edge2[i]] = x[edge1[i]] + sqrt(el[i])*(sigma[i]*norm_rand() - alpha[i]*(x[edge1[i]] - theta[i]));
+                       x[edge2[i]] = x[edge1[i]] - alpha[i]*el[i]*(x[edge1[i]] - theta[i]) + sqrt(el[i])*sigma[i]*norm_rand();
+                       PutRNGstate();
+               }
+               break;
+       case 3 : for (i = *Nedge - 1; i >= 0; i--) {
+                       GetRNGstate();
+                       x[edge2[i]] = x[edge1[i]] - (1 - exp(alpha[i]*el[i]))*(x[edge1[i]] - theta[i]) + sqrt(el[i])*sigma[i]*norm_rand();
                         PutRNGstate();
                 }
                 break;
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Thu, 16 Sep 2010 15:12:58 +0000 (15:12 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Thu, 16 Sep 2010 15:12:58 +0000 (15:12 +0000)
ChangeLog		patch \| blob \| history
DESCRIPTION		patch \| blob \| history
R/CDF.birth.death.R	[new file with mode: 0644]	patch \| blob
R/DNA.R		patch \| blob \| history
R/MPR.R	[new file with mode: 0644]	patch \| blob
R/ace.R		patch \| blob \| history
R/collapse.singles.R		patch \| blob \| history
R/compar.gee.R		patch \| blob \| history
R/dist.topo.R		patch \| blob \| history
R/pic.R		patch \| blob \| history
R/plot.phylo.R		patch \| blob \| history
R/rTrait.R		patch \| blob \| history
R/read.GenBank.R		patch \| blob \| history
man/DNAbin.Rd		patch \| blob \| history
man/MPR.Rd	[new file with mode: 0644]	patch \| blob
man/ape-internal.Rd		patch \| blob \| history
man/base.freq.Rd		patch \| blob \| history
man/dist.dna.Rd		patch \| blob \| history
man/dist.topo.Rd		patch \| blob \| history
man/mixedFontLabel.Rd		patch \| blob \| history
man/rTraitCont.Rd		patch \| blob \| history
man/read.GenBank.Rd		patch \| blob \| history
man/rlineage.Rd	[new file with mode: 0644]	patch \| blob
man/yule.time.Rd		patch \| blob \| history
src/dist_dna.c		patch \| blob \| history
src/rTrait.c		patch \| blob \| history