From 570e43f262a255d2da94a6f95decb10d550a170d Mon Sep 17 00:00:00 2001
From: Don Armstrong <don@donarmstrong.com>
Date: Mon, 8 Aug 2016 13:54:19 -0500
Subject: [PATCH] add common_r_code

---
 README.md       |  15 ++++
 array_to_text.R |  13 +++
 simple_grange.R |   7 ++
 stat-poly-eq.R  | 122 +++++++++++++++++++++++++++
 to_latex.R      | 215 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 372 insertions(+)
 create mode 100644 README.md
 create mode 100644 array_to_text.R
 create mode 100644 simple_grange.R
 create mode 100644 stat-poly-eq.R
 create mode 100644 to_latex.R

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..270bf43
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+# Common R Code
+
+This repository contains a selection of R snippets which are not
+currently part of an existing library (and are in general fairly
+trivial)
+
+# Components
+
+1. `array_to_text.R` converts data.frames and data.tables into tab
+   delimited files; suitable for using in scripts and Makefile rules
+2. `simple_grange.R` generates very simple genetic ranges
+3. `to_latex.R` converts tables to LaTeX; you probably want xtable
+   instead of the code in this file.
+4. `stat-poly-eq.R` is code from Pedro Aphalo to plot equations; you
+   probably actually want the version in ggpmisc, not this code
diff --git a/array_to_text.R b/array_to_text.R
new file mode 100644
index 0000000..48aa7ab
--- /dev/null
+++ b/array_to_text.R
@@ -0,0 +1,13 @@
+### $(R) $(ROPTS) -f array_to_text.R --args data_file_name array_name array_file.txt
+
+arguments <- commandArgs(trailingOnly=TRUE)
+data.file.name <- arguments[1]
+data.array <- arguments[2]
+text.file.name <- arguments[3]
+
+load(data.file.name)
+eval(parse(text=c("write.table(",data.array,",",
+             "file=text.file.name,",
+             'sep="\\t")')
+           )
+     )
diff --git a/simple_grange.R b/simple_grange.R
new file mode 100644
index 0000000..982657b
--- /dev/null
+++ b/simple_grange.R
@@ -0,0 +1,7 @@
+library("GenomicRanges")
+simple.grange <- function(chr,start,end) {
+    return(GRanges(seqnames=Rle(paste0("chr",gsub("^chr","",as.character(chr))),
+                       rep(1,length(chr))),
+                   ranges=IRanges(start=start,
+                       end=end)))
+}
diff --git a/stat-poly-eq.R b/stat-poly-eq.R
new file mode 100644
index 0000000..82f418e
--- /dev/null
+++ b/stat-poly-eq.R
@@ -0,0 +1,122 @@
+#' @title Add a curve from a fitted linear model and a label to a plot.
+#'
+#' @description \code{stat_poly_eq} fits a polynomial and generates a label with an equation
+#' and/or coefficient of determination (R^2).
+#'
+#' @param mapping The aesthetic mapping, usually constructed with
+#'   \code{\link[ggplot2]{aes}} or \code{\link[ggplot2]{aes_string}}. Only needs
+#'   to be set at the layer level if you are overriding the plot defaults.
+#' @param data A layer specific dataset - only needed if you want to override
+#'   the plot defaults.
+#' @param geom The geometric object to use display the data
+#' @param position The position adjustment to use for overlapping points on this
+#'   layer
+#' @param show.legend logical. Should this layer be included in the legends?
+#'   \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}
+#'   never includes, and \code{TRUE} always includes.
+#' @param inherit.aes If \code{FALSE}, overrides the default aesthetics, rather
+#'   than combining with them. This is most useful for helper functions that
+#'   define both data and aesthetics and shouldn't inherit behaviour from the
+#'   default plot specification, e.g. \code{\link[ggplot2]{borders}}.
+#' @param ... other arguments passed on to \code{\link[ggplot2]{layer}}. This
+#'   can include aesthetics whose values you want to set, not map. See
+#'   \code{\link[ggplot2]{layer}} for more details.
+#' @param na.rm	a logical indicating whether NA values should be stripped
+#'   before the computation proceeds.
+#' @param formula a formula object
+#' @param eq.with.lhs logical indicating whether lhs of equation is to be
+#'   included in label.
+#'
+#' @details This stat can be used to automatically annotate a plot with R^2,
+#' adjusted R^2 or the fitted model equation. It supports only linear models
+#' fitted with function \code{lm()}. The R^2 and adjusted R^2 annotations can be
+#' used with any linear model formula. The fitted equation label is correclty
+#' generated for polynomials or quasi-polynomials through the origin. Model
+#' formulas can use \code{poly()} or be defined algebraically with terms of
+#' powers of increasing magnitude with no missing intermediate terms, except
+#' possibly for the intercept indicated by "- 1" or "-1" in the formula. The
+#' validity of the \code{formula} is not checked in the current implementation,
+#' and for this reason the default aesthetics sets R^2 as label for the
+#' annotation. This stat only generates the label, the predicted values need
+#' to be sepearately added to the plot, so to make sure that the same model
+#' formula is used in all steps it is best to save the formula as an object
+#' and supply this object as argument to the different statistics.
+#'
+#' @section Computed variables:
+#'   \describe{ \item{x}{x position for left edge}
+#'   \item{y}{y position near upper edge}
+#'   \item{eq.label}{equation for the
+#'   fitted polynomial as a character string to be parsed}
+#'   \item{rr.label}{\eqn{R^2} of the fitted model as a character string to be parsed}
+#'   \item{adj.rr.label}{Adjusted \eqn{R^2} of the fitted model as a character string
+#'   to be parsed}
+#'   \item{hjust}{Set to zero to override the default of the "text" geom.}}
+#'
+#' @examples
+#' library(ggplot2)
+#' # generate artificial data
+#' set.seed(4321)
+#' x <- 1:100
+#' y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
+#' my.data <- data.frame(x, y, group = c("A", "B"), y2 = y * c(0.5,2))
+#' # give a name to a formula
+#' formula <- y ~ poly(x, 3, raw = TRUE)
+#' # plot
+#' ggplot(my.data, aes(x, y)) +
+#'   geom_point() +
+#'   geom_smooth(method = "lm", formula = formula) +
+#'   stat_poly_eq(formula = formula, parse = TRUE)
+#'
+#' @export
+#'
+stat_poly_eq <- function(mapping = NULL, data = NULL, geom = "text",
+                         formula = NULL, eq.with.lhs = TRUE,
+                         position = "identity",
+                         na.rm = FALSE, show.legend = FALSE,
+                         inherit.aes = TRUE, ...) {
+  ggplot2::layer(
+    stat = StatPolyEq, data = data, mapping = mapping, geom = geom,
+    position = position, show.legend = show.legend, inherit.aes = inherit.aes,
+    params = list(formula = formula, eq.with.lhs = eq.with.lhs,
+                  na.rm = na.rm,
+                  ...)
+  )
+}
+
+#' @rdname ggpmisc-ggproto
+#' @format NULL
+#' @usage NULL
+#' @export
+StatPolyEq <-
+  ggplot2::ggproto("StatPolyEq", ggplot2::Stat,
+                   compute_group = function(data,
+                                            scales,
+                                            formula,
+                                            eq.with.lhs) {
+                     mf <- lm(formula, data)
+                     coefs <- coef(mf)
+                     formula.rhs.chr <- as.character(formula)[3]
+                     if (grepl("-1", formula.rhs.chr) || grepl("- 1", formula.rhs.chr)) {
+                        coefs <- c(0, coefs)
+                     }
+                     rr <- summary(mf)$r.squared
+                     adj.rr <- summary(mf)$adj.r.squared
+                     eq.char <- as.character(signif(polynom::as.polynomial(coefs), 3))
+                     if (eq.with.lhs) {
+                       eq.char <- paste("italic(y)", eq.char, sep = "~`=`~")
+                     }
+                     rr.char <- format(rr, digits = 2)
+                     adj.rr.char <- format(adj.rr, digits = 2)
+                     data.frame(x = min(data$x),
+                                y = max(data$y) - 0.1 * diff(range(data$y)),
+                                eq.label = gsub("x", "~italic(x)", eq.char, fixed = TRUE),
+                                rr.label = paste("italic(R)^2", rr.char, sep = "~`=`~"),
+                                adj.rr.label = paste("italic(R)[adj]^2",
+                                                     adj.rr.char, sep = "~`=`~"),
+                                hjust = 0)
+                   },
+                   default_aes =
+                     ggplot2::aes(label = ..rr.label.., hjust = ..hjust..),
+                   required_aes = c("x", "y")
+  )
+
diff --git a/to_latex.R b/to_latex.R
new file mode 100644
index 0000000..2adfafe
--- /dev/null
+++ b/to_latex.R
@@ -0,0 +1,215 @@
+to.org <- function(x){
+    sub("([0-9.-]+)([eE])\\+?(-?)0*([0-9]+)","$\\1\\\\times 10^{\\3\\4}$",
+        x)
+}
+to.latex <- function(x){
+  sub("([0-9]+)([eE])\\+?(-?)0*([0-9]+)","\\1\\\\ensuremath{\\\\times 10^{\\3\\4}}",
+      x)
+}
+cleanup.tolatex <- function(output) {
+  gsub("\\\\textrm\\{e\\}(-?)0*(\\d+)","\\\\ensuremath{\\\\times 10^{\\1\\2}}",output);
+}
+
+trimws <- function(x,left=TRUE,right=TRUE){
+  result <- x
+  if(left)
+    result <- gsub('^\\s+','',x,perl=TRUE)
+  if(right)
+    result <- gsub('\\s+$','',x,perl=TRUE)
+
+  return(result)
+}
+
+
+print.summary.glm.xtable <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...) {
+
+    cat("\\begin{verbatim}\n")
+    cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), 
+        "\n\n", sep = "")
+    cat("Deviance Residuals: \n")
+    if (x$df.residual > 5) {
+        x$deviance.resid <- quantile(x$deviance.resid, na.rm = TRUE)
+        names(x$deviance.resid) <- c("Min", "1Q", "Median", "3Q", 
+            "Max")
+    }
+    xx <- zapsmall(x$deviance.resid, digits + 1)
+    cat("\\end{verbatim}\n")
+    print(table.to.latex(xx,digits=digits))
+    cat("\\begin{verbatim}\n")
+    if (length(x$aliased) == 0L) {
+        cat("\nNo Coefficients\n")
+    }
+    else {
+        df <- if ("df" %in% names(x)) 
+            x[["df"]]
+        else NULL
+        if (!is.null(df) && (nsingular <- df[3L] - df[1L])) 
+            cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", 
+                sep = "")
+        else cat("\nCoefficients:\n")
+        coefs <- x$coefficients
+        if (!is.null(aliased <- x$aliased) && any(aliased)) {
+            cn <- names(aliased)
+            coefs <- matrix(NA, length(aliased), 4L, dimnames = list(cn, 
+                colnames(coefs)))
+            coefs[!aliased, ] <- x$coefficients
+        }
+        cat("\\end{verbatim}\n")
+        colnames(coefs) <- gsub("(Pr\\(>\\|z\\|\\))","$\\1$",colnames(coefs),perl=TRUE)
+        if (nrow(coefs) > 15) {
+          print(table.to.latex(coefs,longtable=TRUE))
+        } else {
+          print(table.to.latex(coefs))
+        }
+        cat("\\begin{verbatim}\n")
+    }
+    cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ", 
+        format(x$dispersion), ")\n\n", apply(cbind(paste(format(c("Null", 
+            "Residual"), justify = "right"), "deviance:"), format(unlist(x[c("null.deviance", 
+            "deviance")]), digits = max(5, digits + 1)), " on", 
+            format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 
+            1L, paste, collapse = " "), sep = "")
+    if (nzchar(mess <- naprint(x$na.action))) 
+        cat("  (", mess, ")\n", sep = "")
+    cat("AIC: ", format(x$aic, digits = max(4, digits + 1)), 
+        "\n\n", "Number of Fisher Scoring iterations: ", x$iter, 
+        "\n", sep = "")
+    correl <- x$correlation
+    if (!is.null(correl)) {
+        p <- NCOL(correl)
+        if (p > 1) {
+            cat("\nCorrelation of Coefficients:\n")
+            if (is.logical(symbolic.cor) && symbolic.cor) {
+                print(symnum(correl, abbr.colnames = NULL))
+            }
+            else {
+                correl <- format(round(correl, 2), nsmall = 2, 
+                  digits = digits)
+                correl[!lower.tri(correl)] <- ""
+                print(correl[-1, -p, drop = FALSE], quote = FALSE)
+            }
+        }
+    }
+    cat("\\end{verbatim}\n")
+    cat("\n")
+    invisible(x)
+}
+
+
+
+table.to.latex <- function(table,
+                           digits=NULL,
+                           format=NULL,
+                           scientific=NA,
+                           nsmall=0,
+                           colspec="r",
+                           useBooktabs=TRUE,
+                           longtable=FALSE,
+                           caption=NULL,
+                           label=NULL,
+                           rownames=TRUE,
+                           centering=FALSE,
+                           latex.pos="",
+                           toprule=if(useBooktabs) "\\toprule" else "\\hline\\hline",
+                           midrule=if(useBooktabs) "\\midrule" else "\\hline",
+                           bottomrule=if(useBooktabs) "\\bottomrule" else "\\hline\\hline",
+                           cols.as.is=FALSE,
+                           ...) {
+  ans <- ""
+  header <- c(if(rownames){""}else{NULL},colnames(table));
+  .format.function <- function(x){
+    format(if(is.null(format)||is.null(scientific)) {x} else {
+      if(!is.na(suppressWarnings(as.numeric(x)))) as.numeric(x) else x},
+           digits=digits,nsmall=nsmall,scientific=scientific,...)
+  }
+  if(is.null(dim(table))){
+    header <- names(table)
+    rownames <- FALSE
+  }
+  res <- rbind(header,
+               cbind(if(rownames){rownames(table)}else{NULL},
+                     if(is.null(dim(table))){t(sapply(table,.format.function))} else
+                     {apply(table,1:2,.format.function)}
+                     ))
+  res[,] <- gsub("_","\\\\_",res[,])
+  if (!missing(cols.as.is)) {
+      res.no.change <-
+          rbind(header,
+                cbind(if(rownames){rownames(table)}else{NULL},
+                      if(is.null(dim(table))){
+                          t(sapply(table,function(x){x}))
+                      } else {
+                          apply(table,1:2,function(x){x})
+                      }
+                      ))
+      res[,cols.as.is] <- res.no.change[,cols.as.is]
+  }
+  ignore.rows <- -1
+  coefrows <- 2:NROW(res)
+  if(rownames) {
+      coefrows <- 1:NROW(res)
+      ignore.rows <- TRUE
+  }
+##  res[coefrows,ignore.rows] <- sub("(\\*+)","$^{\\1}$",
+##                                   res[coefrows,ignore.rows])
+  if (NROW(res) > 1) {
+      res[coefrows,ignore.rows] <- sub("([0-9]+)([eE])\\+?(-?)0*([0-9]+)","\\1\\\\ensuremath{\\\\times 10^{\\3\\4}}",
+                                       res[coefrows,ignore.rows])
+  }
+  tabspec <- rep("l",ncol(res))
+  if (length(colspec)==length(tabspec) &&
+      length(colspec) > 1
+      ) {
+    tabspec <- colspec
+  }
+
+  tabbegin <- paste("\\begin{tabular}{",paste(tabspec,collapse=""),"}",sep="")
+  tabend <- "\\end{tabular}\n"
+  if (longtable) {
+    tabbegin <- paste("\\begin{longtable}{",paste(tabspec,collapse=""),"}",sep="")
+    tabend <- "\\end{longtable}\n"
+    if (!is.null(label)) {
+      tabend <- paste(sep="","\\label{",label,"}\\\\","\n",
+                      tabend
+                      )
+    }
+    if(!is.null(caption))
+      tabend <- paste(sep="","\\caption{",caption,"}\\\\","\n",
+                      tabend
+                      )
+  } else if (!is.null(label)) {
+      tabbegin <- paste0("\\begin{table}",latex.pos,"\n",
+                         if (centering) {"\\centering\n"} else {""},
+                         tabbegin)
+      if (!is.null(caption)) {
+          tabend <- paste0(tabend,"\\caption{",caption,"}\n")
+      }
+      tabend <- paste0(tabend,"\\label{",label,"}\n",
+                       "\\end{table}","\n")
+  }
+  ans <- c(ans,tabbegin)
+  header <- NULL
+  if(length(toprule))
+    header <- c(header,toprule)
+  if (!rownames)
+    header <- c(header,paste("\\multicolumn{1}{c}{",gsub("\\_"," ",trimws(res[1,1])),"}",sep=""))
+  for(j in 2:ncol(res))
+    header <- c(header,paste(" & \\multicolumn{1}{c}{",gsub("\\_"," ",trimws(res[1,j])),"}",sep=""))
+  header <- c(header,"\\\\")
+  if(length(midrule))
+    header <- c(header,midrule)
+  if (longtable)
+    header <- c(header,"\\endhead")
+  ans <- c(ans,header)
+  if (NROW(res) > 1) {
+      for(i in 2:NROW(res)) {
+          ans <- c(ans,
+                   paste(paste(res[i,],collapse=" & "),"\\\\"))
+      }
+  }
+  if(length(bottomrule))
+    ans <- c(ans,bottomrule)
+  ans <- c(ans,tabend)
+  structure(ans,class="Latex")
+  
+}
-- 
2.39.5