##--------------------------------------## ## Script for Basic R Review ## ## John Fox ## ## Programming in R ## ## ICPSR Summer Program 2009 ## ##--------------------------------------## # Basics # arithmetic, interacting with the interpreter # basic arithmetic operations 2+3 2-3 2*3 2/3 2^3 #exponentiation 2**3 #exponentiation # precedence of operators 4^2-3*2 (4^2) - (3*2) # use parentheses group, spaces to clarify 1 - 6 + 4 2^-3 -2--3 -2 - -3 # use spaces to clarify 4 - 2 ^2 (4 - 2)^2 # parentheses to group # functions, arguments to functions, obtaining help log(100) log(100, base=10) log(100, b=10) help(log) ?log log(100, 10) apropos("log") help.search("log") RSiteSearch("loglinear", "functions") "+"(2, 3) # even operators are functions `+`(2, 3) # "backtick" preferred in this context (to mean name of object) # vectors c(1,2,3,4) # combine 1:4 # sequence operator 4:1 -1:2 # note precedence seq(1,4) seq(2, 8, by=2) seq(0, 1, by=.1) seq(0, 1, length=11) seq_along(seq(2, 8, by=2)) # to get indices for vector of unknown length # vectorized arithmetic c(1,2,3,4)/2 c(1,2,3,4)/c(4,3,2,1) log(c(0.1,1,10,100), 10) c(1,2,3,4) + c(4,3) # "recycling" rule c(1,2,3,4) + c(4,3,2) # variables x <- c(1,2,3,4) x x/2 (y <- sqrt(x)) # assign and print x <- rnorm(100) x summary(x) # a "generic" function `a vector` <- 1:5 # a non-standard name `a vector` z = 1:10 # alternative assignment operator z 1:10 -> w # also works w assign("u", sqrt(w)) # as does this u # basic indexing x[21] x[11:20] x[-(11:100)] # careful here! #comparison and logical operators 1 == 2 1 < 2 "b" < c("a", "c") # full set: < <= == >= > != TRUE && FALSE # AND TRUE & c(TRUE, FALSE) # vectorized AND TRUE || FALSE # OR FALSE | c(TRUE, FALSE) # vectorized OR ! c(T, F) # NOT (abbreviations of TRUE and FALSE, best avoided!) T <- FALSE T T || F remove(T) z <- x[1:10] z abs(z) > 0.5 z[abs(z) > 0.5] # indexing by a logical vector # user-defined functions mean(x) sum(x)/length(x) my.mean <- function(x) sum(x)/length(x) # function definition my.mean(x) my.mean(y) my.mean(1:100) x # "global" value of x unaffected # cleaning up objects() remove(`a vector`, u, w, x, y, z) objects() # using traceback() letters my.var <- function(x) sum((x - my.mean(x))^2)/(length(x) - 1) my.var(rnorm(1e6)) my.var(letters) traceback() # Duncan example # creating a data frame from data stored in a file Duncan <- read.table(file.choose(), header=TRUE) Duncan View(Duncan) head(Duncan) summary(Duncan) # attaching a data frame (not best practice!) prestige attach(Duncan) prestige # the search path search() # distributions and bivariate relationships hist(prestige) pairs(cbind(prestige,income,education), panel=function(x,y){ points(x,y) abline(lm(y~x), lty=2) lines(lowess(x,y)) }, diag.panel=function(x){ par(new=TRUE) hist(x, main="", axes=FALSE) } ) plot(income, education) identify(income, education, row.names(Duncan)) row.names(Duncan)[c(6,16,27)] # fitting a regression duncan.model <- lm(prestige ~ income + education) duncan.model summary(duncan.model) # regression diagnostics library(car) search() plot(cookd(duncan.model)) abline(h = 4/(45-2-1)) identify(1:45, cookd(duncan.model), row.names(Duncan)) influencePlot(duncan.model, labels=row.names(Duncan)) av.plots(duncan.model, labels=row.names(Duncan)) # refitting the model remove <- which.names(c("minister", "conductor"), Duncan) remove duncan.model.2 <- update(duncan.model, subset=-remove) summary(duncan.model.2)