##--------------------------------------------------------------## ## Script for Lecture 9: ## ## R Graphics ## ## John Fox ## ## Introduction to the R Statistical Computing Environment ## ## ICPSR ## ## 2021 ## ##--------------------------------------------------------------## # Graphics Basics (traditional S/R graphics) args(plot.default) # default plot method ?plot # points, lines, axes, frames plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="") # coordinate system plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="", axes=FALSE) # w/o axes par("col") # graphical parameters names(par()) ?par plot(1:25, pch=1:25, xlab="Symbol Number", ylab="") # symbols lines(1:25, type="h", lty="dashed") plot(26:1, xlab="letters", ylab="", pch=letters, axes=FALSE, frame.plot=TRUE) plot(c(1, 7), c(0, 1), type="n", axes=FALSE, # lines xlab="Line Type (lty)", ylab="") box() # add frame axis(1, at=1:6) # x-axis for (lty in 1:6){ lines(c(lty, lty, lty + 1), c(0, 0.5, 1), lty=lty) } plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="") abline(0, 1) # intercept and slope abline(c(1, -1), lty="dashed") # horizontal and vertical lines: abline(h=seq(0, 1, by=0.1), v=seq(0, 1, by=0.1), col="gray") # text par(mfrow=c(1, 2)) # array of plots plot(c(0, 1), c(0, 1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE, main="(a)") text(x=c(0.2, 0.5), y=c(0.2, 0.7), c("example text", "another string")) plot(c(0, 1), c(0, 1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE, main="(b)") text(locator(3), c("one", "two", "three")) # left-click 3 times locator() # returns mouse coordinates, remember to exit # plot layout par(mfrow=c(1, 1)) # back to one panel plot(1:10, type="n", xlab="", ylab="", axes=FALSE, frame=TRUE) box("outer", col="magenta", lwd=3) text(5.5, 5.5, "plotting region", cex=1.5) text(5.5, 4.5, "standard par() settings") for (side in 1:4){ for (line in 0:4){ mtext(paste("line", line), side=side, line=line, adj=0.65) } } for (side in 1:4){ mtext(paste("margin, side", side), side=side, line=1, adj=0.1, cex=1.25) } # mult-panel plots (oldpar <- par(mfrow=c(2, 2), oma=c(1, 1, 2, 1), mar=c(2, 2, 2, 1))) n.lines <- par("mar") - 1 for (plot in 1:4){ plot(1:10, type="n", xlab="", ylab="", axes=FALSE, frame=TRUE) box("figure") text(5.5, 5.5, paste("panel", plot), cex=1.5) if (plot == 1){ text(5.5, 3.5, "par(mfrow=c(2, 2),\n oma=c(1, 1, 2, 1),\n mar=c(2, 2, 2, 1))", cex=1.25) } for (side in 1:4){ for (line in 0:n.lines[side]){ mtext(paste("inner margin line", line), side=side, line=line, cex=0.75) } } } box("inner") box("outer") n.lines <- par("oma") - 1 for (side in 1:4){ for (line in 0:n.lines[side]){ mtext(paste("outer margin line", line), side=side, line=line, outer=TRUE) } } # arrows and line segments par(mfrow=c(1, 2)) plot(c(1, 5), c(0, 1), axes=FALSE, type="n", xlab="", ylab="") arrows(x0=1:5, y0=rep(0.1, 5), x1=1:5, y1=seq(0.3, 0.9, len=5), code=3) title("(a) arrows") plot(c(1, 5), c(0, 1), axes=FALSE, type="n", xlab="", ylab="") segments(x0=1:5, y0=rep(0.1, 5), x1=1:5, y1=seq(0.3, 0.9, len=5)) title("(b) segments") # nicer arrows: p.arrows() in the sfsmisc package # note different arguments, unidirectional arrows # question: how can you easily get bidirectional arrows? library(sfsmisc) par(mfrow=c(1, 1)) # restore single panel plot(c(1, 5), c(0, 1), axes=FALSE, type="n", xlab="", ylab="") p.arrows(x1=1:5, y1=rep(0.1, 5), x2=1:5, y2=seq(0.3, 0.9, len=5), fill="black") # polygons plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="") polygon(c(0.2, 0.8, 0.8), c(0.2, 0.2, 0.8), col="blue") polygon(c(0.2, 0.2, 0.8), c(0.2, 0.8, 0.8)) # legend plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE) legend(locator(1), legend=c("group A", "group B", "group C"), lty=1:3, pch=1:3, col=c("blue", "magenta", "cyan")) legend("topright", legend=c("group A", "group B", "group C"), lty=1:3, pch=1:3, col=c("blue", "magenta", "cyan"), inset=0.01) # curve curve(x*cos(25/x), 0.01, pi, n=1000) curve(sin, 0, 2*pi, ann=FALSE, axes=FALSE, lwd=2) axis(1, pos=0, at=c(0, pi/2, pi, 3*pi/2, 2*pi), labels=c(0, expression(pi/2), expression(pi), expression(3*pi/2), expression(2*pi)), hadj=-0.2, padj=-0.4) axis(2, pos=0) curve(cos, add=TRUE, lty="dashed", lwd=2) legend(pi, 1, lty=1:2, lwd=2, legend=c("sine", "cosine"), bty="n") # colors pie(rep(1, length(palette())), col=palette()) palette() rainbow(10) # "#RRGGBBTT" (red, green, blue, transparency) pie(rep(1, 10), col=rainbow(10)) gray(0:9/9) # "#RRGGBB" pie(rep(1, 10), col=gray(0:9/9)) length(colors()) head(colors(), 20) # first 20 named colors pie(rep(1, 20), labels=head(colors(), 20), col=head(colors(), 20)) # for palettes based on psychophysical principles, see colorspace package # Putting it together (as time permits) # diagrams of the standard normal density function # showing the area above 1.96 oldpar <- par(mar = c(5, 6, 4, 2) + 0.1) # leave room on the left oldpar # old parameter saved z <- seq(-4, 4, length=1000) p <- dnorm(z) plot(z, p, type="l", lwd=2, main=expression("The Standard Normal Density Function" ~~ phi(z)), ylab=expression(phi(z) == frac(1, sqrt(2*pi)) * ~~ e^- ~~ frac(z^2, 2))) abline(h=0, col="gray") abline(v=0, col="gray") z0 <- z[z >= 1.96] # define region to fill z0 <- c(z0[1], z0) p0 <- p[z >= 1.96] p0 <- c(0, p0) polygon(z0, p0, col="gray") coords <- locator(2) # locate head and tail of arrow arrows(coords$x[1], coords$y[1], coords$x[2], coords$y[2], code=1, length=0.125) text(coords$x[2], coords$y[2], pos=3, # text above tail of arrow expression(integral(phi(z)*dz, 1.96, infinity) == .025)) # with lines at z = -3:3 (on your own time) par(oldpar) # restore graphics parameters plot(z, p, type="n", xlab="", ylab="", axes=FALSE, main=expression("The Standard Normal Density Function" ~~ phi(z))) axis(1, pos=0, at=-3:3) abline(h=0) axis(2, pos=0, at=.1*1:3) abline(v=0) curve(dnorm, -4, 4, n=1000, add=TRUE, lwd=2) text(locator(2), c("z", expression(phi(z))), xpd=TRUE) for (z0 in -3:3) lines(c(z0, z0), c(0, dnorm(z0)), lty=2) # explaining nearest-neighbour local linear regression quartz(height=8, width=8) # graphics device for macOS # use (e.g.) windows() for Windows or X11() on Linux/Unix oldpar <- par(mfrow=c(2, 2), las=1) # 2 x 2 array of graphs data("UN", package="carData") names(UN) UN <- na.omit(UN[, c("ppgdp", "infantMortality")]) # filter NAs for variables used gdp <- UN$ppgdp/1000 # rescale to $1000s infant <- UN$infantMortality ord <- order(gdp) # sort data by gdp gdp <- gdp[ord] infant <- infant[ord] x0 <- gdp[150] # focal x = x_(150) dist <- abs(gdp - x0) # distance from focal x h <- sort(dist)[97] # bandwidth for span of 0.5 (where n = 193) pick <- dist <= h # observations within window plot(gdp, infant, xlab="GDP per Capita ($1000s)", ylab="Infant Mortality Rate per 1000", type="n", main="(a) Observations Within the Window\nspan = 0.5") points(gdp[pick], infant[pick], col="blue") points(gdp[!pick], infant[!pick], col="gray") points(gdp[150], infant[150], pch=16, cex=1.5, col="magenta") # focal point abline(v=x0, col="magenta") # at focal x abline(v=c(x0 - h, x0 + h), lty=2, col="blue") # window text(x0, par("usr")[4] + 5, expression(x[(150)]), xpd=TRUE, col="magenta") plot(range(gdp), c(0,1), xlab="GDP per Capita ($1000s)", ylab="Tricube Kernel Weight", type="n", main="(b) Tricube Weights") abline(v=x0, col="magenta") abline(v=c(x0 - h, x0 + h), lty=2, col="blue") tricube <- function(x, x0, h) { z <- abs(x - x0)/h ifelse(z < 1, (1 - z^3)^3, 0) } tc <- function(x) tricube(x, x0, h) # to use with curve curve(tc, min(gdp), max(gdp), n=1000, lwd=2, add=TRUE, col="blue") points(gdp[pick], tricube(gdp, x0, h)[pick], pch=16, col="blue") abline(h=c(0, 1), col="gray") plot(gdp, infant, xlab="GDP per Capita ($1000s)", ylab="Infant Mortality Rate per 1000", type="n", main="(c) Local Linear Regression") points(gdp[pick], infant[pick], col="blue") points(gdp[!pick], infant[!pick], col="gray") abline(v=x0, col="magenta") abline(v=c(x0 - h, x0 + h), lty=2, col="blue") mod <- lm(infant ~ gdp, weights=tricube(gdp, x0, h)) new <- data.frame(gdp=c(x0 - h, x0, x0 + h)) fit <- predict(mod, newdata=new) lines(c(x0 - h, x0 + h), fit[c(1, 3)], lwd=3, col="magenta") # local regression line points(x0, fit[2], pch=16, cex=2, col="magenta") text(x0, par("usr")[4] + 5, expression(x[(150)]), xpd=TRUE, col="magenta") text(x0 + 1, fit[2] + 2.5, expression(hat(y)[(150)]), adj=c(0, 0), col="magenta") plot(gdp, infant, xlab="GDP per Capita ($1000s)", ylab="Infant Mortality Rate (per 1000)", main="(d) Complete Local-Linear Estimate", col=gray(0.25)) yhat <- numeric(length(gdp)) for (i in 1:length(gdp)){ # kernel estimate at each x x0 <- gdp[i] dist <- abs(gdp - x0) h <- sort(dist)[97] mod <- update(mod, weights=tricube(gdp, x0, h)) yhat[i] <- predict(mod, newdata=data.frame(gdp=x0)) } lines(gdp, yhat, lwd=2, col="magenta") text(gdp[149], infant[149], paste0(" ", rownames(UN)[149]), adj=c(0, 0)) par(oldpar) # more on positioning panels in a multi-panel plot (on your own time) # using par(mfrow=c(m, n)) par(mfrow=c(2, 2)) x <- seq(0, 1, length=200) Ey <- rev(1 - x^2) y <- Ey + 0.1*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(a) monotone, simple", cex.main=1, xlab="", ylab="") lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) x <- seq(0.02, 0.99, length=200) Ey <- log(x/(1 - x)) y <- Ey + 0.5*rnorm(200) plot (x, y, axes=FALSE, frame=TRUE, main="(b) monotone, not simple", cex.main=1, xlab="", ylab="") lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) x <- seq(0.2, 1, length=200) Ey <- (x - 0.5)^2 y <- Ey + 0.04*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(c) non-monotone, simple", cex.main=1, xlab="", ylab="") lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) # finer control using par(fig=c(x1, x2, y1, y2)) par(oma=c(0, 0, 1, 0), mar=c(2, 3, 3, 2)) # leave room in top outer margin par(fig=c(0, .5, .5, 1)) # top-left panel x <- seq(0, 1, length=200) Ey <- rev(1 - x^2) y <- Ey + 0.1*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(a) monotone, simple", cex.main=1, xlab="", ylab="", col="gray", cex=0.75) lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) par(fig=c(.5, 1, .5, 1)) # top-right panel par(new=TRUE) x <- seq(0.02, 0.99, length=200) Ey <- log(x/(1 - x)) y <- Ey + 0.5*rnorm(200) plot (x, y, axes=FALSE, frame=TRUE, main="(b) monotone, not simple", cex.main=1, xlab="", ylab="", col="gray", cex=0.75) lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) par(fig=c(.25, .75, 0, .5)) # bottom panel par(new=TRUE) x <- seq(0.2, 1, length=200) Ey <- (x - 0.5)^2 y <- Ey + 0.04*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(c) non-monotone, simple", cex.main=1, xlab="", ylab="", col="gray", cex=0.75) lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) title("Nonlinear Relationships", outer=TRUE) remove(list=objects()) # clean up # Popular graphics packages # lattice for trellis graphs (part of the standard R distribution) library("lattice") data("Prestige", package="carData") Prestige$type <- factor(Prestige$type, levels=c("bc", "wc", "prof")) xyplot(prestige ~ income | type, data=Prestige, panel=function(x, y){ panel.grid() # grid lines panel.xyplot(x, y) # points panel.lmline(x, y, lwd=2) # LS line panel.loess(x, y, span=0.8, lwd=2, lty=2, col="magenta") # smooth } ) # ggplot2 ("grammar of graphics") library("ggplot2") Prestige2 <- na.omit(Prestige) # removing missing data # try line-by-line initially omitting +s: ggplot(data=Prestige2, aes(x=income, y=prestige), show.legend=FALSE) + geom_point() + geom_smooth(method="lm", se=FALSE, lty="dashed", col="magenta") + geom_smooth(method="loess", span=0.8, se=FALSE) + facet_grid(. ~ type)