##--------------------------------------------------------------##
##                  Script for Lecture 8:                       ##
##           R Programming 2: Beyond the Basics                 ##
##                        John Fox                              ##
##   Introduction to the R Statistical Computing Environment    ##
##                          ICPSR                               ##
##                          2021                                ##
##--------------------------------------------------------------##

# Non-trivial programming example:
#   implementation of Newton-Rapson algorithm for logistic regression

  # first attempt, bugged!

lreg <- function(X, y, max.iter=10, tol=1E-6, verbose=FALSE) {
  X <- cbind(1, X)
  b <- b.last <- rep(0, ncol(X))
  it <- 1
  while (it <= max.iter){
    if (verbose) cat("\niteration = ", it, ": ", b)
    p <- 1/(1 + exp(-X %*% b))
    V <- diag(p * (1 - p))
    var.b <- solve(t(X) %*% V %*% X)
    b <- b + var.b %*% t(X) %*% (y - p)
    if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break
    b.last <- b
    it <- it + 1
  }
  if (it > max.iter) warning("maximum iterations exceeded")
  if (verbose) cat("\n")
  list(coef=as.vector(b), vcov=var.b, iterations=it)
}

      # prepare data to test

library("carData") # for data
head(Mroz, 10)
Mroz$lfp <- with(Mroz, ifelse(lfp == "yes", 1, 0))
Mroz$wc <- with(Mroz, ifelse(wc == "yes", 1, 0))
Mroz$hc <- with(Mroz, ifelse(hc == "yes", 1, 0))
head(Mroz, 10)
xtabs(~lfp, data=Mroz)

mod.mroz <- with(Mroz, # fails!
                 lreg(cbind(k5, k618, age, wc, hc, lwg, inc), lfp))

      # debugging (a digression)

          # call stack to error: traceback

traceback()

          # debug(): the step-through debugger
debug(lreg)
mod.mroz <- with(Mroz,
                 lreg(cbind(k5, k618, age, wc, hc, lwg, inc), lfp))
undebug(lreg)

          # also see ?browser, ?debugonce

  # logistic regression by Newton-Raphson (corrected):

lreg <- function(X, y, max.iter=10, tol=1E-6, verbose=FALSE){
  # X is the model matrix
  # y is the response vector of 0s and 1s
  # max.iter is the maximum number of iterations
  # tol is a convergence criterion
  # verbose: show iteration history?
  X <- cbind(1, X)  # add constant
  b <- b.last <- rep(0, ncol(X))  # initialize coefficients
  it <- 1  # initialize iteration counter
  while (it <= max.iter){
    if (verbose) cat("\niteration = ", it, ": ", b)
    p <- as.vector(1/(1 + exp(-X %*% b)))
    V <- diag(p * (1 - p))
    var.b <- solve(t(X) %*% V %*% X)
    b <- b + var.b %*% t(X) %*% (y - p)  # update coefficients
    if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break
    b.last <- b  # update previous coefficients
    it <- it + 1  # increment counter
  }
  if (verbose) cat("\n")  # newline
  if (it > max.iter) warning("maximum iterations exceeded")
  list(coefficients=as.vector(b), vcov=var.b, iterations=it)
}

mod.mroz <- with(Mroz,
                 lreg(cbind(k5, k618, age, wc, hc, lwg, inc), lfp,
                      verbose=TRUE))
coef(mod.mroz)
sqrt(diag(mod.mroz$vcov))
mod.mroz$iterations
summary(glm(lfp ~ ., data=Mroz, family=binomial)) # check

    # optimizing code (another digression)
        
        # a larger (but not large) problem

set.seed(12345)  # for reproducibility
X <- matrix(rnorm(10000*10), 10000, 10)
y <- rbinom(10000, 1, prob=1/(1 + exp(-cbind(1, X) %*% rep(1, 11))))
head(X)
head(y)
table(y)

            # measuring time: system.time (also see microbenchmark package)

system.time(mod.1 <- lreg(X, y))
system.time(mod.glm <- glm(y ~ X, family=binomial))

rbind(coef(mod.1), coef(mod.glm))
all.equal(coef(mod.1), coef(mod.glm))

            # profiling code for time and memory usage

(tmp <- tempfile()) # create temporary file
Rprof(tmp, memory.profiling=TRUE) # turn on profiling
mod.1 <- lreg(X, y)
Rprof() # turn off profiling
summaryRprof(tmp, memory="both") # summarize results
unlink(tmp) # delete temporary file (note not mnemonic!)

      # making the program more efficient
      #  (could use sparse matrix arithmetic via the Matrix package)

diagprod <- function(d, X){
  if (!is.matrix(X) || !nrow(X) == length(d)) stop("X and d not conformable")
  d*X
}

# explanation:

XX <- matrix(1, 5, 3) # don't want to overwrite X
XX
(V <- diag(1:5))
V %*% XX
(1:5) * XX # same thing!

diagprod(1:5, XX) # same thing

lreg2 <- function(X, y, max.iter=10, tol=1E-6) {
  X <- cbind(1, X)
  b <- b.last <- rep(0, ncol(X))
  it <- 1
  while (it <= max.iter){
    p <- as.vector(1/(1 + exp(-X %*% b)))
    var.b <- solve(crossprod(X, diagprod(p*(1 - p), X)))
    b <- b + var.b %*% crossprod(X, y - p)
    if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break
    b.last <- b
    it <- it + 1
  }
  if (it > max.iter) warning("maximum iterations exceeded")
  list(coefficients=as.vector(b), vcov=var.b, iterations=it)
}

system.time(mod.2 <- lreg2(X, y))
all.equal(coef(mod.2), coef(mod.1)) # check

tmp <- tempfile() # create temporary file
Rprof(tmp, memory.profiling=TRUE, interval=0.002) 
  # profiling with smaller sampling interval
mod.2 <- lreg2(X, y)
Rprof() # turn off profiling
summaryRprof(tmp, memory="both") # summarize results
unlink(tmp) # delete temporary file


# The S3 object system 

    # S3 classes

mod.prestige <- lm(prestige ~ income + education + women,
    data=Prestige)
attributes(mod.prestige)
class(mod.prestige)


v <- 1:10
v
attributes(v)
class(v)

class(v) <- "character"
attributes(v)
v
class(v)

    # S3 generic functions and methods

print # the print generic

stats:::print.lm # print method for "lm" objects (from the stats package namespace)

print(mod.prestige)
mod.prestige # print() is invoked implicitly
stats:::print.lm(mod.prestige)  # equivalent, but bad form
print.lm(mod.prestige)  # doesn't work, because method not "exported"

methods("print") # print methods
methods(class="lm") # methods for objects of class "lm"

    # S3 "inheritance"

mod.mroz <- glm(lfp ~ ., family=binomial, data=Mroz)
class(mod.mroz)

dfbeta # generic function
methods("dfbeta") # only an "lm" method
head(dfbeta(mod.prestige)) # for an "lm" object
head(dfbeta(mod.mroz)) # for a "glm" object, inheriting "lm" method