##--------------------------------------------------------------## ## Script for Lecture 1: ## ## Getting Started With R ## ## John Fox ## ## Introduction to the R Statistical Computing Environment ## ## ICPSR ## ## 2021 ## ##--------------------------------------------------------------## # An Illustrative Data Analysis: Duncan's Occupational Prestige Regression # we'll revisit this example tomorrow in greater detail library("car") # load car package (for programs and data in carData package) brief(Duncan) # abbreviated output help("Duncan") # codebook for the data set View(Duncan) # in the RStudio data viewer # Examining the Data scatterplotMatrix( ~ prestige + education + income, # uses a 'one-sided' formula smooth=list(spread=FALSE), id=list(n=3), data=Duncan) # Duncan's regression duncan.model <- lm(prestige ~ education + income, data=Duncan) # note data argument, two-sided formula, return "lm" object duncan.model summary(duncan.model) # more detailed report # added-variable plots (influence diagnostic) avPlots(duncan.model, id=list(n=3, method="mahal")) # refit without ministers and conductors whichNames(c("minister", "conductor"), Duncan) duncan.model.2 <- update(duncan.model, subset=-c(6, 16)) compareCoefs(duncan.model, duncan.model.2) # Basics # arithmetic, interacting with the interpreter # basic arithmetic operations 2 + 3 # addition 2 - 3 # subtraction 2*3 # multiplication 2/3 # division 2^3 # exponentiation # precedence of operators 4^2 - 3*2 1 - 6 + 4 2^-3 (4^2) - (3*2) # use parentheses to group, clarify 4 + 3^2 (4 + 3)^2 -2--3 -2 - -3 # use spaces around binary operators to clarify # functions, arguments to functions, obtaining help and information log(100) # natural log (i.e., e^log(100) = 100) log(100, base=10) # log base-10 (i.e., 10^log10(100) = 10^2 = 100) log10(100) # equivalent log(100, b=10) # argument abbreviation args(log) # arguments of the log() function exp(1) # e^1 = e (Euler's constant) help("log") # documentation ?log # equivalent example("log") # execute examples in help page log(100, 10) # specifying arguments by position apropos("log") help.search("log") # also see search in RStudio Help tab RSiteSearch("loglinear", "functions") # internet search `+`(2, 3) # even operators are functions # creating vectors c(1, 2, 3, 4) # combine 1:4 # integer-sequence operator 4:1 -1:2 # note precedence -(1:2) seq(1, 4) seq(2, 8, by=2) # specify interval seq(0, 1, by=0.1) # non-integer sequence seq(0, 1, length=11) # specify number of elements # vectorized arithmetic c(1, 2, 3, 4)/2 c(1, 2, 3, 4)/c(4, 3, 2, 1) log(c(0.1, 1, 10, 100), 10) c(1, 2, 3, 4) + c(4, 3) # no warning c(1, 2, 3, 4) + c(4, 3, 2) # produces warning # creating variables (named objects) by assignment x <- c(1, 2, 3, 4) # assignment x # print x = c(1, 2, 3, 4) # can use = for assignment (best avoided but opinions vary) x x/2 # equivalent to c(1, 2, 3, 4)/2 (y <- sqrt(x)) # parentheses to assign and print trick (x <- rnorm(100)) head(x) # first few summary(x) # a "generic" function # character and logical data (words <- c("To", "be", "or", "not", "to", "be")) paste(words, collapse=" ") (logical.values <- c(TRUE, TRUE, FALSE, TRUE)) !logical.values # negation (not operator) # coercion sum(x) sum(logical.values) # number of TRUEs (coercion to numeric) sum(!logical.values) # number of FALSEs (TRUE-> 1, FALSE -> 0) c("A", FALSE, 3.0) # coerced to character c(10, FALSE, -6.5, TRUE) # coerced to numeric # basic indexing x[12] # 12th element words[2] # second element logical.values[3] # third element x[6:15] # elements 6 through 15 x[c(1, 3, 5)] # 1st, 3rd, 5th elements (note use of c() ) x[-(11:100)] # omit elements 11 through 100 x[1:10] # same! v <- 1:4 v[c(TRUE, FALSE, FALSE, TRUE)] # logical indexing #comparison and logical operators 1 == 2 # equal to 1 != 2 # not equal to 1 <= 2 # less than or equal to 1 < 1:3 # less than (vectorized) 3:1 > 1:3 # greater than 3:1 >= 1:3 # greater than or equal to TRUE & c(TRUE, FALSE) # logical and c(TRUE, FALSE, FALSE) | c(TRUE, TRUE, FALSE) # logical or TRUE && FALSE # unvectorized and (for programming) TRUE || FALSE # unvectorized or ! c(T, F) # abbreviations of TRUE and FALSE, best avoided! T <- FALSE # perverse! (but in most cases innocuous) T remove(T) TRUE <- FALSE # fails (z <- x[1:10]) # first 10 elements of x z < -0.5 # is each element less than -0.5? z > 0.5 # is each element greater than 0.5 z < -0.5 | z > 0.5 # < and > are of higher precedence than | abs(z) > 0.5 # absolute value, equivalent to last expression z[abs(z) > 0.5] # values of z for which |z| > 0.5 # user-defined functions mean(x) # of 100 random-normal numbers sum(x)/length(x) # equivalent myMean <- function(x) { sum(x)/length(x) } myMean # can be printed like any object myMean(x) y # from sqrt(c(1, 2, 3, 4)) myMean(y) myMean(1:100) head(x) # global x undisturbed mySD <- function(x) { sqrt(sum((x - myMean(x))^2)/(length(x) - 1)) } mySD(1:100) sd(1:100) # check # cleaning up objects() remove(v, x, y, z, logical.values, words) objects() # using traceback() letters mySD(letters) traceback()