##-----------------------------------------------------------------##
##                    Script for Day 1a:                           ##
##                 Getting Started With R                          ##
##                        John Fox                                 ##
## The R Statistical Computing Environment: The Basics and Beyond  ##
##              ICPSR Summer Program, Berkeley                     ##
##                          2016                                   ##
##-----------------------------------------------------------------##

# Basics

    # arithmetic, interacting with the interpreter

        # basic arithmetic operations

2 + 3 # addition
2 - 3 # subtraction
2*3   # multiplication
2/3   # division
2^3   # exponentiation

        # precedence of operators
        
4^2 - 3*2
1 - 6 + 4
2^-3

(4^2) - (3*2) # use parentheses to group, clarify
4 + 3^2
(4 + 3)^2

-2--3
-2 - -3 # use spaces to clarify


    # functions, arguments to functions, obtaining help and information

log(100)
log(100, base=10)
log10(100) # equivalent
log(100, b=10)  # argument abbreviation

help(log)
?log  # equivalent
args(log)

log(100, 10)  # arguments by position

example("log")

apropos("log")
help.search("log")

RSiteSearch("loglinear", "functions")

`+`(2, 3) # even operators are functions

    # vectors

c(1, 2, 3, 4)  # combine

1:4     # integer-sequence operator
4:1
-1:2    # note precedence
-(1:2)
seq(1, 4)
seq(2, 8, by=2) # specify interval
seq(0, 1, by=0.1) # non-integer sequence
seq(0, 1, length=11) # specify number of elements

    # vectorized arithmetic
    
c(1, 2, 3, 4)/2
c(1, 2, 3, 4)/c(4, 3, 2, 1)
log(c(0.1, 1, 10, 100), 10)

c(1, 2, 3, 4) + c(4, 3) # no warning
c(1, 2, 3, 4) + c(4, 3, 2) # produces warning

    # variables

x <- c(1, 2, 3, 4) # assignment
x # print

x = c(1, 2, 3, 4) # can use = for assignment (best avoided)
x

x/2

(y <- sqrt(x)) # assign and print

(x <- rnorm(100))
head(x) # first few
summary(x)  # a "generic" function

    # character and logical data
    
(words <- c("To", "be", "or", "not", "to", "be"))
paste(words, collapse=" ")

(vals <- c(TRUE, TRUE, FALSE, TRUE))
!vals  # not operator

sum(vals) # coercion to numeric
sum(!vals)
c("A", FALSE, 3.0) # coercion to character

    # basic indexing
    
x[12]    # 12th element
words[2] # second element
vals[3]     # third element

x[6:15] # elements 6 through 15
x[-(11:100)] # omit elements 11 through 100 (note parentheses)
x[1:10] # same!

    #comparison and logical operators

1 == 2   # note == to test equality
1 != 2
1 <= 2
1 < 1:3  # vectorized
3:1 > 1:3
3:1 >= 1:3
TRUE & c(TRUE, FALSE)  # and (vectorized)
c(TRUE, FALSE, FALSE) | c(TRUE, TRUE, FALSE)  # or (vectorized)

! c(T, F)   # abbreviations of TRUE and FALSE, best avoided!
T <- FALSE  # perverse! (but in most cases innocuous)
T
remove(T)
TRUE <- FALSE  # fails

(z <- x[1:10])
z < -0.5
z > 0.5
z < -0.5 | z > 0.5  #  < and > of higher precedence than |
abs(z) > 0.5  # absolute value
z[abs(z) > 0.5] # indexing by a logical vector


    # user-defined functions

mean(x)
sum(x)/length(x)

myMean <- function(x) sum(x)/length(x)
myMean # can be printed like any object

myMean(x)
y # from sqrt(c(1, 2, 3, 4))
myMean(y)
myMean(1:100)
head(x) # global x undisturbed

myVar <- function(x) sum((x - myMean(x))^2)/(length(x) - 1)
myVar(1:100)
var(1:100) # check

    # cleaning up

objects()
remove(x, y, z, vals, words)
objects()

    # using traceback()

letters
myVar(letters)

traceback()


# Duncan example

# Using the R Commander

library(Rcmdr)

    # creating a data frame from data stored in a file

Duncan <- read.table(file.choose(), header=TRUE)
file.choose() # explanation

Duncan
head(Duncan)
summary(Duncan)  # generic summary function

    # attaching a data frame (best avoided)

prestige # error!

attach(Duncan)  # not the best approach
prestige

        # the search path

search()

    # distributions and bivariate relationships

windows()  # for demo, not necessary in RStudio; on Mac OS X, quartz()

hist(prestige)

pairs(cbind(prestige, income, education))

pairs(cbind(prestige, income, education), 
    panel=function(x, y){
        points(x, y)
        abline(lm(y ~ x), lty="dashed")
        lines(lowess(x, y))
        },
    diag.panel=function(x){
        par(new=TRUE)
        hist(x, main="", axes=FALSE)
        }
    )

scatmat <- function(...) { # user-defined function
    pairs(cbind(...),
        panel=function(x, y){
            points(x, y)
            abline(lm(y ~ x), lty=2)
            lines(lowess(x, y))
        },
        diag.panel=function(x){
            par(new=TRUE)
            hist(x, main="", axes=FALSE)
        }
    )
}

scatmat(prestige, income, education)

plot(education, income)
identify(education, income, row.names(Duncan)) # must exit from identify mode!
row.names(Duncan)[c(6, 16, 27)]

    # fitting a regression

(duncan.model <- lm(prestige ~ income + education))
summary(duncan.model)  # again, summary generic

detach("Duncan")