##-----------------------------------------------------------------## ## Script for Day 1a: ## ## Getting Started With R ## ## John Fox ## ## The R Statistical Computing Environment: The Basics and Beyond ## ## ICPSR Summer Program, Berkeley ## ## 2016 ## ##-----------------------------------------------------------------## # Basics # arithmetic, interacting with the interpreter # basic arithmetic operations 2 + 3 # addition 2 - 3 # subtraction 2*3 # multiplication 2/3 # division 2^3 # exponentiation # precedence of operators 4^2 - 3*2 1 - 6 + 4 2^-3 (4^2) - (3*2) # use parentheses to group, clarify 4 + 3^2 (4 + 3)^2 -2--3 -2 - -3 # use spaces to clarify # functions, arguments to functions, obtaining help and information log(100) log(100, base=10) log10(100) # equivalent log(100, b=10) # argument abbreviation help(log) ?log # equivalent args(log) log(100, 10) # arguments by position example("log") apropos("log") help.search("log") RSiteSearch("loglinear", "functions") `+`(2, 3) # even operators are functions # vectors c(1, 2, 3, 4) # combine 1:4 # integer-sequence operator 4:1 -1:2 # note precedence -(1:2) seq(1, 4) seq(2, 8, by=2) # specify interval seq(0, 1, by=0.1) # non-integer sequence seq(0, 1, length=11) # specify number of elements # vectorized arithmetic c(1, 2, 3, 4)/2 c(1, 2, 3, 4)/c(4, 3, 2, 1) log(c(0.1, 1, 10, 100), 10) c(1, 2, 3, 4) + c(4, 3) # no warning c(1, 2, 3, 4) + c(4, 3, 2) # produces warning # variables x <- c(1, 2, 3, 4) # assignment x # print x = c(1, 2, 3, 4) # can use = for assignment (best avoided) x x/2 (y <- sqrt(x)) # assign and print (x <- rnorm(100)) head(x) # first few summary(x) # a "generic" function # character and logical data (words <- c("To", "be", "or", "not", "to", "be")) paste(words, collapse=" ") (vals <- c(TRUE, TRUE, FALSE, TRUE)) !vals # not operator sum(vals) # coercion to numeric sum(!vals) c("A", FALSE, 3.0) # coercion to character # basic indexing x[12] # 12th element words[2] # second element vals[3] # third element x[6:15] # elements 6 through 15 x[-(11:100)] # omit elements 11 through 100 (note parentheses) x[1:10] # same! #comparison and logical operators 1 == 2 # note == to test equality 1 != 2 1 <= 2 1 < 1:3 # vectorized 3:1 > 1:3 3:1 >= 1:3 TRUE & c(TRUE, FALSE) # and (vectorized) c(TRUE, FALSE, FALSE) | c(TRUE, TRUE, FALSE) # or (vectorized) ! c(T, F) # abbreviations of TRUE and FALSE, best avoided! T <- FALSE # perverse! (but in most cases innocuous) T remove(T) TRUE <- FALSE # fails (z <- x[1:10]) z < -0.5 z > 0.5 z < -0.5 | z > 0.5 # < and > of higher precedence than | abs(z) > 0.5 # absolute value z[abs(z) > 0.5] # indexing by a logical vector # user-defined functions mean(x) sum(x)/length(x) myMean <- function(x) sum(x)/length(x) myMean # can be printed like any object myMean(x) y # from sqrt(c(1, 2, 3, 4)) myMean(y) myMean(1:100) head(x) # global x undisturbed myVar <- function(x) sum((x - myMean(x))^2)/(length(x) - 1) myVar(1:100) var(1:100) # check # cleaning up objects() remove(x, y, z, vals, words) objects() # using traceback() letters myVar(letters) traceback() # Duncan example # Using the R Commander library(Rcmdr) # creating a data frame from data stored in a file Duncan <- read.table(file.choose(), header=TRUE) file.choose() # explanation Duncan head(Duncan) summary(Duncan) # generic summary function # attaching a data frame (best avoided) prestige # error! attach(Duncan) # not the best approach prestige # the search path search() # distributions and bivariate relationships windows() # for demo, not necessary in RStudio; on Mac OS X, quartz() hist(prestige) pairs(cbind(prestige, income, education)) pairs(cbind(prestige, income, education), panel=function(x, y){ points(x, y) abline(lm(y ~ x), lty="dashed") lines(lowess(x, y)) }, diag.panel=function(x){ par(new=TRUE) hist(x, main="", axes=FALSE) } ) scatmat <- function(...) { # user-defined function pairs(cbind(...), panel=function(x, y){ points(x, y) abline(lm(y ~ x), lty=2) lines(lowess(x, y)) }, diag.panel=function(x){ par(new=TRUE) hist(x, main="", axes=FALSE) } ) } scatmat(prestige, income, education) plot(education, income) identify(education, income, row.names(Duncan)) # must exit from identify mode! row.names(Duncan)[c(6, 16, 27)] # fitting a regression (duncan.model <- lm(prestige ~ income + education)) summary(duncan.model) # again, summary generic detach("Duncan")