stat-poly-eq.R

   1 #' @title Add a curve from a fitted linear model and a label to a plot.\r
   2 #'\r
   3 #' @description \code{stat_poly_eq} fits a polynomial and generates a label with an equation\r
   4 #' and/or coefficient of determination (R^2).\r
   5 #'\r
   6 #' @param mapping The aesthetic mapping, usually constructed with\r
   7 #'   \code{\link[ggplot2]{aes}} or \code{\link[ggplot2]{aes_string}}. Only needs\r
   8 #'   to be set at the layer level if you are overriding the plot defaults.\r
   9 #' @param data A layer specific dataset - only needed if you want to override\r
  10 #'   the plot defaults.\r
  11 #' @param geom The geometric object to use display the data\r
  12 #' @param position The position adjustment to use for overlapping points on this\r
  13 #'   layer\r
  14 #' @param show.legend logical. Should this layer be included in the legends?\r
  15 #'   \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}\r
  16 #'   never includes, and \code{TRUE} always includes.\r
  17 #' @param inherit.aes If \code{FALSE}, overrides the default aesthetics, rather\r
  18 #'   than combining with them. This is most useful for helper functions that\r
  19 #'   define both data and aesthetics and shouldn't inherit behaviour from the\r
  20 #'   default plot specification, e.g. \code{\link[ggplot2]{borders}}.\r
  21 #' @param ... other arguments passed on to \code{\link[ggplot2]{layer}}. This\r
  22 #'   can include aesthetics whose values you want to set, not map. See\r
  23 #'   \code{\link[ggplot2]{layer}} for more details.\r
  24 #' @param na.rm a logical indicating whether NA values should be stripped\r
  25 #'   before the computation proceeds.\r
  26 #' @param formula a formula object\r
  27 #' @param eq.with.lhs logical indicating whether lhs of equation is to be\r
  28 #'   included in label.\r
  29 #'\r
  30 #' @details This stat can be used to automatically annotate a plot with R^2,\r
  31 #' adjusted R^2 or the fitted model equation. It supports only linear models\r
  32 #' fitted with function \code{lm()}. The R^2 and adjusted R^2 annotations can be\r
  33 #' used with any linear model formula. The fitted equation label is correclty\r
  34 #' generated for polynomials or quasi-polynomials through the origin. Model\r
  35 #' formulas can use \code{poly()} or be defined algebraically with terms of\r
  36 #' powers of increasing magnitude with no missing intermediate terms, except\r
  37 #' possibly for the intercept indicated by "- 1" or "-1" in the formula. The\r
  38 #' validity of the \code{formula} is not checked in the current implementation,\r
  39 #' and for this reason the default aesthetics sets R^2 as label for the\r
  40 #' annotation. This stat only generates the label, the predicted values need\r
  41 #' to be sepearately added to the plot, so to make sure that the same model\r
  42 #' formula is used in all steps it is best to save the formula as an object\r
  43 #' and supply this object as argument to the different statistics.\r
  44 #'\r
  45 #' @section Computed variables:\r
  46 #'   \describe{ \item{x}{x position for left edge}\r
  47 #'   \item{y}{y position near upper edge}\r
  48 #'   \item{eq.label}{equation for the\r
  49 #'   fitted polynomial as a character string to be parsed}\r
  50 #'   \item{rr.label}{\eqn{R^2} of the fitted model as a character string to be parsed}\r
  51 #'   \item{adj.rr.label}{Adjusted \eqn{R^2} of the fitted model as a character string\r
  52 #'   to be parsed}\r
  53 #'   \item{hjust}{Set to zero to override the default of the "text" geom.}}\r
  54 #'\r
  55 #' @examples\r
  56 #' library(ggplot2)\r
  57 #' # generate artificial data\r
  58 #' set.seed(4321)\r
  59 #' x <- 1:100\r
  60 #' y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)\r
  61 #' my.data <- data.frame(x, y, group = c("A", "B"), y2 = y * c(0.5,2))\r
  62 #' # give a name to a formula\r
  63 #' formula <- y ~ poly(x, 3, raw = TRUE)\r
  64 #' # plot\r
  65 #' ggplot(my.data, aes(x, y)) +\r
  66 #'   geom_point() +\r
  67 #'   geom_smooth(method = "lm", formula = formula) +\r
  68 #'   stat_poly_eq(formula = formula, parse = TRUE)\r
  69 #'\r
  70 #' @export\r
  71 #'\r
  72 stat_poly_eq <- function(mapping = NULL, data = NULL, geom = "text",\r
  73                          formula = NULL, eq.with.lhs = TRUE,\r
  74                          position = "identity",\r
  75                          na.rm = FALSE, show.legend = FALSE,\r
  76                          inherit.aes = TRUE, ...) {\r
  77   ggplot2::layer(\r
  78     stat = StatPolyEq, data = data, mapping = mapping, geom = geom,\r
  79     position = position, show.legend = show.legend, inherit.aes = inherit.aes,\r
  80     params = list(formula = formula, eq.with.lhs = eq.with.lhs,\r
  81                   na.rm = na.rm,\r
  82                   ...)\r
  83   )\r
  84 }\r
  85 \r
  86 #' @rdname ggpmisc-ggproto\r
  87 #' @format NULL\r
  88 #' @usage NULL\r
  89 #' @export\r
  90 StatPolyEq <-\r
  91   ggplot2::ggproto("StatPolyEq", ggplot2::Stat,\r
  92                    compute_group = function(data,\r
  93                                             scales,\r
  94                                             formula,\r
  95                                             eq.with.lhs) {\r
  96                      mf <- lm(formula, data)\r
  97                      coefs <- coef(mf)\r
  98                      formula.rhs.chr <- as.character(formula)[3]\r
  99                      if (grepl("-1", formula.rhs.chr) || grepl("- 1", formula.rhs.chr)) {\r
 100                         coefs <- c(0, coefs)\r
 101                      }\r
 102                      rr <- summary(mf)$r.squared\r
 103                      adj.rr <- summary(mf)$adj.r.squared\r
 104                      eq.char <- as.character(signif(polynom::as.polynomial(coefs), 3))\r
 105                      if (eq.with.lhs) {\r
 106                        eq.char <- paste("italic(y)", eq.char, sep = "~`=`~")\r
 107                      }\r
 108                      rr.char <- format(rr, digits = 2)\r
 109                      adj.rr.char <- format(adj.rr, digits = 2)\r
 110                      data.frame(x = min(data$x),\r
 111                                 y = max(data$y) - 0.1 * diff(range(data$y)),\r
 112                                 eq.label = gsub("x", "~italic(x)", eq.char, fixed = TRUE),\r
 113                                 rr.label = paste("italic(R)^2", rr.char, sep = "~`=`~"),\r
 114                                 adj.rr.label = paste("italic(R)[adj]^2",\r
 115                                                      adj.rr.char, sep = "~`=`~"),\r
 116                                 hjust = 0)\r
 117                    },\r
 118                    default_aes =\r
 119                      ggplot2::aes(label = ..rr.label.., hjust = ..hjust..),\r
 120                    required_aes = c("x", "y")\r
 121   )\r
 122 \r