R/lmorigin.R

   1 'lmorigin' <-
   2 function(formula, data=NULL, origin=TRUE, nperm=999, method=NULL, silent=FALSE)
   3 #
   4 # This program computes a multiple linear regression and performs tests
   5 # of significance of the equation parameters using permutations.
   6 #
   7 # origin=TRUE: the regression line can be forced through the origin. Testing
   8 # the significance in that case requires a special permutation procedure.
   9 #
  10 # Permutation methods: raw data or residuals of full model
  11 #    Default method in regression through the origin: raw data
  12 #    Default method in ordinary multiple regression: residuals of full model
  13 #    - In ordinary multiple regression when m = 1: raw data
  14 #
  15 #         Pierre Legendre, March 2009
  16 {
  17 if(!is.null(method)) method <- match.arg(method, c("raw", "residuals"))
  18 if(is.null(method) & origin==TRUE) method <- "raw"
  19 if(is.null(method) & origin==FALSE) method <- "residuals"
  20 if(nperm < 0) stop("Incorrect value for 'nperm'")
  21
  22 ## From the formula, find the variables and the number of observations 'n'
  23 toto <- lm(formula, data)
  24 mf <- match.call(expand.dots = FALSE)
  25 m <- match(c("formula", "data", "offset"), names(mf), 0)
  26 mf <- mf[c(1, m)]
  27 mf$drop.unused.levels <- TRUE
  28 mf[[1]] <- as.name("model.frame")
  29 mf <- eval(mf, parent.frame())
  30 var.names = colnames(mf)   # Noms des variables
  31 y <- as.matrix(mf[,1])
  32 colnames(y) <- var.names[1]
  33 X <- as.matrix(mf[,-1])
  34 n <- nrow(mf)
  35 m <- ncol(X)
  36
  37 a <- system.time({
  38 mm<- m           # No. regression coefficients, possibly including the intercept
  39 if(m == 1) method <- "raw"
  40 if(nrow(X) != n) stop("Unequal number of rows in y and X")
  41
  42 if(origin) {
  43         if(!silent) cat("Regression through the origin",'\n')
  44         reg <- lm(y ~ 0 + X)
  45         } else {
  46         if(!silent) cat("Multiple regression with estimation of intercept",'\n')
  47         reg <- lm(y ~ X)
  48         mm <- mm+1
  49         }
  50
  51 if(!silent) {
  52         if(nperm > 0) {
  53                 if(method == "raw") {
  54                         cat("Permutation method =",method,"data",'\n')
  55                         } else {
  56                         cat("Permutation method =",method,"of full model",'\n')
  57                         }
  58                 }
  59         }
  60 t.vec      <- summary(reg)$coefficients[,3]
  61 p.param.t  <- summary(reg)$coefficients[,4]
  62 df1        <- summary(reg)$fstatistic[[2]]
  63 df2        <- summary(reg)$fstatistic[[3]]
  64 F          <- summary(reg)$fstatistic[[1]]
  65 y.res      <- summary(reg)$residuals
  66 # b.vec      <- summary(reg)$coefficients[,1]
  67 # r.sq       <- summary(reg)$r.squared
  68 # adj.r.sq   <- summary(reg)$adj.r.squared
  69 # p.param.F  <- pf(F, df1, df2, lower.tail=FALSE)
  70
  71 if(df1 < m) stop("\nCollinearity among the X variables. Check using 'lm'")
  72
  73 # Permutation tests
  74 if(nperm > 0) {
  75
  76         nGT.F  <- 1
  77         nGT1.t <- rep(1,mm)
  78         nGT2.t <- rep(1,mm)
  79         sign.t <- sign(t.vec)
  80
  81         for(i in 1:nperm)  # Permute raw data. Always use this method for F-test
  82                 {
  83                 if(origin) {   # Regression through the origin
  84                         dia.bin <- diag((rbinom(n,1,0.5)*2)-1)
  85                         y.perm <- dia.bin %*% sample(y)
  86                         reg.perm <- lm(y.perm ~ 0 + X)
  87                         } else {   # Multiple linear regression
  88                         y.perm <- sample(y,n)
  89                         reg.perm <- lm(y.perm ~ X)
  90                         }
  91
  92                 # Permutation test of the F-statistic
  93                 F.perm <- summary(reg.perm)$fstatistic[1]
  94                 if(F.perm >= F) nGT.F <- nGT.F+1
  95
  96                 # Permutation tests of the t-statistics: permute raw data
  97                 if(method == "raw") {
  98                         t.perm <- summary(reg.perm)$coefficients[,3]
  99                         if(nperm <= 5) cat(t.perm,'\n')
 100                         for(j in 1:mm) {
 101                                 # One-tailed test in direction of sign
 102                                 if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1
 103                                 # Two-tailed test
 104                                 if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1
 105                                 }
 106                         }
 107                 }
 108
 109         if(method == "residuals") {
 110         # Permute residuals of full model
 111                 for(i in 1:nperm) {
 112                         if(origin) {   # Regression through the origin
 113                                 dia.bin <- diag((rbinom(n,1,0.5)*2)-1)
 114                                 y.perm <- dia.bin %*% sample(y.res)
 115                                 reg.perm <- lm(y.perm ~ 0 + X)
 116                                 } else {   # Multiple linear regression
 117                                 y.perm <- sample(y.res,n)
 118                                 reg.perm <- lm(y.perm ~ X)
 119                                 }
 120
 121                         # Permutation tests of the t-statistics: permute residuals
 122                         t.perm <- summary(reg.perm)$coefficients[,3]
 123                         if(nperm <= 5) cat(t.perm,'\n')
 124                         for(j in 1:mm) {
 125                                 # One-tailed test in direction of sign
 126                                 if(t.perm[j]*sign.t[j] >= t.vec[j]*sign.t[j]) nGT1.t[j] <- nGT1.t[j]+1
 127                                 # Two-tailed test
 128                                 if( abs(t.perm[j]) >= abs(t.vec[j]) ) nGT2.t[j] <- nGT2.t[j]+1
 129                                 }
 130                         }
 131                 }
 132         # Compute the permutational probabilities
 133         p.perm.F <- nGT.F/(nperm+1)
 134         p.perm.t1 <- nGT1.t/(nperm+1)
 135         p.perm.t2 <- nGT2.t/(nperm+1)
 136
 137         ### Do not test intercept by permutation of residuals in multiple regression
 138         if(!origin & method=="residuals") {
 139                 if(silent) {   # Note: silent==TRUE in simulation programs
 140                         p.perm.t1[1] <- p.perm.t2[1] <- 1
 141                         } else {
 142                         p.perm.t1[1] <- p.perm.t2[1] <- NA
 143                         }
 144                 }
 145         }
 146
 147 })
 148 a[3] <- sprintf("%2f",a[3])
 149 if(!silent) cat("Computation time =",a[3]," sec",'\n')
 150 #
 151 if(nperm == 0) {
 152
 153         out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, origin=origin, nperm=nperm, var.names=var.names, call=match.call())
 154
 155         } else {
 156
 157         out <- list(reg=reg, p.param.t.2tail=p.param.t, p.param.t.1tail=p.param.t/2, p.perm.t.2tail=p.perm.t2, p.perm.t.1tail=p.perm.t1, p.perm.F=p.perm.F, origin=origin, nperm=nperm, method=method, var.names=var.names, call=match.call())
 158
 159         }
 160 #
 161 class(out) <- "lmorigin"
 162 out
 163 }