From 570e43f262a255d2da94a6f95decb10d550a170d Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 8 Aug 2016 13:54:19 -0500 Subject: [PATCH] add common_r_code --- README.md | 15 ++++ array_to_text.R | 13 +++ simple_grange.R | 7 ++ stat-poly-eq.R | 122 +++++++++++++++++++++++++++ to_latex.R | 215 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 372 insertions(+) create mode 100644 README.md create mode 100644 array_to_text.R create mode 100644 simple_grange.R create mode 100644 stat-poly-eq.R create mode 100644 to_latex.R diff --git a/README.md b/README.md new file mode 100644 index 0000000..270bf43 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# Common R Code + +This repository contains a selection of R snippets which are not +currently part of an existing library (and are in general fairly +trivial) + +# Components + +1. `array_to_text.R` converts data.frames and data.tables into tab + delimited files; suitable for using in scripts and Makefile rules +2. `simple_grange.R` generates very simple genetic ranges +3. `to_latex.R` converts tables to LaTeX; you probably want xtable + instead of the code in this file. +4. `stat-poly-eq.R` is code from Pedro Aphalo to plot equations; you + probably actually want the version in ggpmisc, not this code diff --git a/array_to_text.R b/array_to_text.R new file mode 100644 index 0000000..48aa7ab --- /dev/null +++ b/array_to_text.R @@ -0,0 +1,13 @@ +### $(R) $(ROPTS) -f array_to_text.R --args data_file_name array_name array_file.txt + +arguments <- commandArgs(trailingOnly=TRUE) +data.file.name <- arguments[1] +data.array <- arguments[2] +text.file.name <- arguments[3] + +load(data.file.name) +eval(parse(text=c("write.table(",data.array,",", + "file=text.file.name,", + 'sep="\\t")') + ) + ) diff --git a/simple_grange.R b/simple_grange.R new file mode 100644 index 0000000..982657b --- /dev/null +++ b/simple_grange.R @@ -0,0 +1,7 @@ +library("GenomicRanges") +simple.grange <- function(chr,start,end) { + return(GRanges(seqnames=Rle(paste0("chr",gsub("^chr","",as.character(chr))), + rep(1,length(chr))), + ranges=IRanges(start=start, + end=end))) +} diff --git a/stat-poly-eq.R b/stat-poly-eq.R new file mode 100644 index 0000000..82f418e --- /dev/null +++ b/stat-poly-eq.R @@ -0,0 +1,122 @@ +#' @title Add a curve from a fitted linear model and a label to a plot. +#' +#' @description \code{stat_poly_eq} fits a polynomial and generates a label with an equation +#' and/or coefficient of determination (R^2). +#' +#' @param mapping The aesthetic mapping, usually constructed with +#' \code{\link[ggplot2]{aes}} or \code{\link[ggplot2]{aes_string}}. Only needs +#' to be set at the layer level if you are overriding the plot defaults. +#' @param data A layer specific dataset - only needed if you want to override +#' the plot defaults. +#' @param geom The geometric object to use display the data +#' @param position The position adjustment to use for overlapping points on this +#' layer +#' @param show.legend logical. Should this layer be included in the legends? +#' \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} +#' never includes, and \code{TRUE} always includes. +#' @param inherit.aes If \code{FALSE}, overrides the default aesthetics, rather +#' than combining with them. This is most useful for helper functions that +#' define both data and aesthetics and shouldn't inherit behaviour from the +#' default plot specification, e.g. \code{\link[ggplot2]{borders}}. +#' @param ... other arguments passed on to \code{\link[ggplot2]{layer}}. This +#' can include aesthetics whose values you want to set, not map. See +#' \code{\link[ggplot2]{layer}} for more details. +#' @param na.rm a logical indicating whether NA values should be stripped +#' before the computation proceeds. +#' @param formula a formula object +#' @param eq.with.lhs logical indicating whether lhs of equation is to be +#' included in label. +#' +#' @details This stat can be used to automatically annotate a plot with R^2, +#' adjusted R^2 or the fitted model equation. It supports only linear models +#' fitted with function \code{lm()}. The R^2 and adjusted R^2 annotations can be +#' used with any linear model formula. The fitted equation label is correclty +#' generated for polynomials or quasi-polynomials through the origin. Model +#' formulas can use \code{poly()} or be defined algebraically with terms of +#' powers of increasing magnitude with no missing intermediate terms, except +#' possibly for the intercept indicated by "- 1" or "-1" in the formula. The +#' validity of the \code{formula} is not checked in the current implementation, +#' and for this reason the default aesthetics sets R^2 as label for the +#' annotation. This stat only generates the label, the predicted values need +#' to be sepearately added to the plot, so to make sure that the same model +#' formula is used in all steps it is best to save the formula as an object +#' and supply this object as argument to the different statistics. +#' +#' @section Computed variables: +#' \describe{ \item{x}{x position for left edge} +#' \item{y}{y position near upper edge} +#' \item{eq.label}{equation for the +#' fitted polynomial as a character string to be parsed} +#' \item{rr.label}{\eqn{R^2} of the fitted model as a character string to be parsed} +#' \item{adj.rr.label}{Adjusted \eqn{R^2} of the fitted model as a character string +#' to be parsed} +#' \item{hjust}{Set to zero to override the default of the "text" geom.}} +#' +#' @examples +#' library(ggplot2) +#' # generate artificial data +#' set.seed(4321) +#' x <- 1:100 +#' y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4) +#' my.data <- data.frame(x, y, group = c("A", "B"), y2 = y * c(0.5,2)) +#' # give a name to a formula +#' formula <- y ~ poly(x, 3, raw = TRUE) +#' # plot +#' ggplot(my.data, aes(x, y)) + +#' geom_point() + +#' geom_smooth(method = "lm", formula = formula) + +#' stat_poly_eq(formula = formula, parse = TRUE) +#' +#' @export +#' +stat_poly_eq <- function(mapping = NULL, data = NULL, geom = "text", + formula = NULL, eq.with.lhs = TRUE, + position = "identity", + na.rm = FALSE, show.legend = FALSE, + inherit.aes = TRUE, ...) { + ggplot2::layer( + stat = StatPolyEq, data = data, mapping = mapping, geom = geom, + position = position, show.legend = show.legend, inherit.aes = inherit.aes, + params = list(formula = formula, eq.with.lhs = eq.with.lhs, + na.rm = na.rm, + ...) + ) +} + +#' @rdname ggpmisc-ggproto +#' @format NULL +#' @usage NULL +#' @export +StatPolyEq <- + ggplot2::ggproto("StatPolyEq", ggplot2::Stat, + compute_group = function(data, + scales, + formula, + eq.with.lhs) { + mf <- lm(formula, data) + coefs <- coef(mf) + formula.rhs.chr <- as.character(formula)[3] + if (grepl("-1", formula.rhs.chr) || grepl("- 1", formula.rhs.chr)) { + coefs <- c(0, coefs) + } + rr <- summary(mf)$r.squared + adj.rr <- summary(mf)$adj.r.squared + eq.char <- as.character(signif(polynom::as.polynomial(coefs), 3)) + if (eq.with.lhs) { + eq.char <- paste("italic(y)", eq.char, sep = "~`=`~") + } + rr.char <- format(rr, digits = 2) + adj.rr.char <- format(adj.rr, digits = 2) + data.frame(x = min(data$x), + y = max(data$y) - 0.1 * diff(range(data$y)), + eq.label = gsub("x", "~italic(x)", eq.char, fixed = TRUE), + rr.label = paste("italic(R)^2", rr.char, sep = "~`=`~"), + adj.rr.label = paste("italic(R)[adj]^2", + adj.rr.char, sep = "~`=`~"), + hjust = 0) + }, + default_aes = + ggplot2::aes(label = ..rr.label.., hjust = ..hjust..), + required_aes = c("x", "y") + ) + diff --git a/to_latex.R b/to_latex.R new file mode 100644 index 0000000..2adfafe --- /dev/null +++ b/to_latex.R @@ -0,0 +1,215 @@ +to.org <- function(x){ + sub("([0-9.-]+)([eE])\\+?(-?)0*([0-9]+)","$\\1\\\\times 10^{\\3\\4}$", + x) +} +to.latex <- function(x){ + sub("([0-9]+)([eE])\\+?(-?)0*([0-9]+)","\\1\\\\ensuremath{\\\\times 10^{\\3\\4}}", + x) +} +cleanup.tolatex <- function(output) { + gsub("\\\\textrm\\{e\\}(-?)0*(\\d+)","\\\\ensuremath{\\\\times 10^{\\1\\2}}",output); +} + +trimws <- function(x,left=TRUE,right=TRUE){ + result <- x + if(left) + result <- gsub('^\\s+','',x,perl=TRUE) + if(right) + result <- gsub('\\s+$','',x,perl=TRUE) + + return(result) +} + + +print.summary.glm.xtable <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...) { + + cat("\\begin{verbatim}\n") + cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), + "\n\n", sep = "") + cat("Deviance Residuals: \n") + if (x$df.residual > 5) { + x$deviance.resid <- quantile(x$deviance.resid, na.rm = TRUE) + names(x$deviance.resid) <- c("Min", "1Q", "Median", "3Q", + "Max") + } + xx <- zapsmall(x$deviance.resid, digits + 1) + cat("\\end{verbatim}\n") + print(table.to.latex(xx,digits=digits)) + cat("\\begin{verbatim}\n") + if (length(x$aliased) == 0L) { + cat("\nNo Coefficients\n") + } + else { + df <- if ("df" %in% names(x)) + x[["df"]] + else NULL + if (!is.null(df) && (nsingular <- df[3L] - df[1L])) + cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n", + sep = "") + else cat("\nCoefficients:\n") + coefs <- x$coefficients + if (!is.null(aliased <- x$aliased) && any(aliased)) { + cn <- names(aliased) + coefs <- matrix(NA, length(aliased), 4L, dimnames = list(cn, + colnames(coefs))) + coefs[!aliased, ] <- x$coefficients + } + cat("\\end{verbatim}\n") + colnames(coefs) <- gsub("(Pr\\(>\\|z\\|\\))","$\\1$",colnames(coefs),perl=TRUE) + if (nrow(coefs) > 15) { + print(table.to.latex(coefs,longtable=TRUE)) + } else { + print(table.to.latex(coefs)) + } + cat("\\begin{verbatim}\n") + } + cat("\n(Dispersion parameter for ", x$family$family, " family taken to be ", + format(x$dispersion), ")\n\n", apply(cbind(paste(format(c("Null", + "Residual"), justify = "right"), "deviance:"), format(unlist(x[c("null.deviance", + "deviance")]), digits = max(5, digits + 1)), " on", + format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), + 1L, paste, collapse = " "), sep = "") + if (nzchar(mess <- naprint(x$na.action))) + cat(" (", mess, ")\n", sep = "") + cat("AIC: ", format(x$aic, digits = max(4, digits + 1)), + "\n\n", "Number of Fisher Scoring iterations: ", x$iter, + "\n", sep = "") + correl <- x$correlation + if (!is.null(correl)) { + p <- NCOL(correl) + if (p > 1) { + cat("\nCorrelation of Coefficients:\n") + if (is.logical(symbolic.cor) && symbolic.cor) { + print(symnum(correl, abbr.colnames = NULL)) + } + else { + correl <- format(round(correl, 2), nsmall = 2, + digits = digits) + correl[!lower.tri(correl)] <- "" + print(correl[-1, -p, drop = FALSE], quote = FALSE) + } + } + } + cat("\\end{verbatim}\n") + cat("\n") + invisible(x) +} + + + +table.to.latex <- function(table, + digits=NULL, + format=NULL, + scientific=NA, + nsmall=0, + colspec="r", + useBooktabs=TRUE, + longtable=FALSE, + caption=NULL, + label=NULL, + rownames=TRUE, + centering=FALSE, + latex.pos="", + toprule=if(useBooktabs) "\\toprule" else "\\hline\\hline", + midrule=if(useBooktabs) "\\midrule" else "\\hline", + bottomrule=if(useBooktabs) "\\bottomrule" else "\\hline\\hline", + cols.as.is=FALSE, + ...) { + ans <- "" + header <- c(if(rownames){""}else{NULL},colnames(table)); + .format.function <- function(x){ + format(if(is.null(format)||is.null(scientific)) {x} else { + if(!is.na(suppressWarnings(as.numeric(x)))) as.numeric(x) else x}, + digits=digits,nsmall=nsmall,scientific=scientific,...) + } + if(is.null(dim(table))){ + header <- names(table) + rownames <- FALSE + } + res <- rbind(header, + cbind(if(rownames){rownames(table)}else{NULL}, + if(is.null(dim(table))){t(sapply(table,.format.function))} else + {apply(table,1:2,.format.function)} + )) + res[,] <- gsub("_","\\\\_",res[,]) + if (!missing(cols.as.is)) { + res.no.change <- + rbind(header, + cbind(if(rownames){rownames(table)}else{NULL}, + if(is.null(dim(table))){ + t(sapply(table,function(x){x})) + } else { + apply(table,1:2,function(x){x}) + } + )) + res[,cols.as.is] <- res.no.change[,cols.as.is] + } + ignore.rows <- -1 + coefrows <- 2:NROW(res) + if(rownames) { + coefrows <- 1:NROW(res) + ignore.rows <- TRUE + } +## res[coefrows,ignore.rows] <- sub("(\\*+)","$^{\\1}$", +## res[coefrows,ignore.rows]) + if (NROW(res) > 1) { + res[coefrows,ignore.rows] <- sub("([0-9]+)([eE])\\+?(-?)0*([0-9]+)","\\1\\\\ensuremath{\\\\times 10^{\\3\\4}}", + res[coefrows,ignore.rows]) + } + tabspec <- rep("l",ncol(res)) + if (length(colspec)==length(tabspec) && + length(colspec) > 1 + ) { + tabspec <- colspec + } + + tabbegin <- paste("\\begin{tabular}{",paste(tabspec,collapse=""),"}",sep="") + tabend <- "\\end{tabular}\n" + if (longtable) { + tabbegin <- paste("\\begin{longtable}{",paste(tabspec,collapse=""),"}",sep="") + tabend <- "\\end{longtable}\n" + if (!is.null(label)) { + tabend <- paste(sep="","\\label{",label,"}\\\\","\n", + tabend + ) + } + if(!is.null(caption)) + tabend <- paste(sep="","\\caption{",caption,"}\\\\","\n", + tabend + ) + } else if (!is.null(label)) { + tabbegin <- paste0("\\begin{table}",latex.pos,"\n", + if (centering) {"\\centering\n"} else {""}, + tabbegin) + if (!is.null(caption)) { + tabend <- paste0(tabend,"\\caption{",caption,"}\n") + } + tabend <- paste0(tabend,"\\label{",label,"}\n", + "\\end{table}","\n") + } + ans <- c(ans,tabbegin) + header <- NULL + if(length(toprule)) + header <- c(header,toprule) + if (!rownames) + header <- c(header,paste("\\multicolumn{1}{c}{",gsub("\\_"," ",trimws(res[1,1])),"}",sep="")) + for(j in 2:ncol(res)) + header <- c(header,paste(" & \\multicolumn{1}{c}{",gsub("\\_"," ",trimws(res[1,j])),"}",sep="")) + header <- c(header,"\\\\") + if(length(midrule)) + header <- c(header,midrule) + if (longtable) + header <- c(header,"\\endhead") + ans <- c(ans,header) + if (NROW(res) > 1) { + for(i in 2:NROW(res)) { + ans <- c(ans, + paste(paste(res[i,],collapse=" & "),"\\\\")) + } + } + if(length(bottomrule)) + ans <- c(ans,bottomrule) + ans <- c(ans,tabend) + structure(ans,class="Latex") + +} -- 2.39.2