##----------------------------------------------------------## ## Script for Statistical Modeling Functions and Packages ## ## John Fox ## ## Programming in R ## ## ICPSR Summer Program 2009 ## ##----------------------------------------------------------## # Writing statistical modeling functions # Example: logistic regression # note standard arguments: formula, data, etc. lreg <- function(formula, data, subset, na.action, model = TRUE, contrasts = NULL, max.iter=10, tol=1E-6) { lreg.fit <- function() # to do the computations { b <- b.last <- rep(0, ncol(X)) it <- 1 while (it <= max.iter){ p <- as.vector(1/(1 + exp(-X %*% b))) var.b <- solve(crossprod(X, p * (1 - p) * X)) b <- b + var.b %*% crossprod(X, y - p) if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break b.last <- b it <- it + 1 } if (it > max.iter) warning("maximum iterations exceeded") dev <- -2*sum(y*log(p) + (1 - y)*log(1 - p)) b <- as.vector(b) names(b) <- rownames(var.b) <- colnames(var.b) <- colnames(X) list(coefficients=b, var=var.b, iterations=it, deviance=dev, converged=it <= max.iter) } call <- match.call() # returns the function call mf <- match.call(expand.dots = FALSE) # the function call w/o ... args <- match(c("formula", "data", "subset", "na.action"), names(mf), 0) # which arguments are present? mf <- mf[c(1, args)] mf$drop.unused.levels <- TRUE mf[[1]] <- as.name("model.frame") mf <- eval.parent(mf) # create a model frame terms <- attr(mf, "terms") # terms object for the model y <- model.response(mf) # response variable values <- sort(unique(y)) if (length(values) != 2) stop("the response variable is not binary") y <- as.numeric(y == values[2]) X <- model.matrix(terms, mf, contrasts) # model matrix mod <- lreg.fit() class(mod) <- "lreg" mod$na.action <- attr(mf, "na.action") mod$contrasts <- attr(X, "contrasts") mod$xlevels <- .getXlevels(terms, mf) mod$call <- call mod$terms <- terms if (model) mod$model <- mf mod } # define standard methods: print, summary, coef, residuals, anova, etc. print.lreg <- function(x, ...) { cat("\nCall:\n") print(x$call) cat("\nCoefficients:\n") print(x$coefficients) cat("\nDeviance = ", x$deviance, "\n\n") if (!x$converged) cat("\n *** lreg did not converge ***\n\n") invisible(x) } vcov.lreg <- function(object, ...) { object$var } library(car) # for data mod.lreg <- lreg(lfp ~ ., data=Mroz) mod.lreg vcov(mod.lreg) coef(mod.lreg) # inherits coef.default coef stats:::coef.default debug(lreg) lreg(lfp ~ ., data=Mroz) undebug(lreg) # Writing and building packages remove(list=objects(all=TRUE)) # a blank slate objects() # Functions for a simple matrix-algebra package source( "http://socserv.socsci.mcmaster.ca/jfox/Courses/R-programming/matrixDemos.R") objects() # some examples A <- matrix(c(1,2,3,4,5,6,7,8,10), 3, 3) # a nonsingular matrix A RREF(A) # the reduced row-echelon form of A b <- 1:3 RREF(A, b) # solving the matrix equation Ax = b RREF(A, diag(3)) # inverting A B <- matrix(1:9, 3, 3) # a singular matrix B RREF(B) RREF(B, b) RREF(B, diag(3)) Ginv(A, fractions=TRUE) # a generalized inverse of A = inverse of A round(Ginv(A) %*% A, 6) # check Ginv(B, fractions=TRUE) # a generalized inverse of B B %*% Ginv(B) %*% B # check B C <- matrix(c(1,2,3,2,5,6,3,6,10), 3, 3) # nonsingular, symmetric C cholesky(C) cholesky(C) %*% t(cholesky(C)) # check # Create the package "skeleton" objects() remove(A, b, B, C) objects() setwd("c:/temp") # make a mess in the temp directory package.skeleton("matrixDemos") # to check package (at OS prompt): R CMD check matrixDemos # to install package (at OS prompt): R CMD INSTALL matrixDemos