July 1, 2014

… and teachers

Goals and challenges

Goals

  • \(\#\)useR2014
  • "Authentic data-science experience." - Amelia
  • "Make statisticians popular at parties." - James

Challenges

  • Inexperienced teachers
  • Limited time frames
  • Curricular materials
  • Learning to \(\#\)useR2014 can be hard

Lessons

1. Simple syntax

xyplot(mpg ~ wt | as.factor(cyl), data = mtcars)

vs.

par(mfrow = c(1,3))
plot(mtcars$wt[mtcars$cyl == 4], mtcars$mpg[mtcars$cyl == 4])
plot(mtcars$wt[mtcars$cyl == 6], mtcars$mpg[mtcars$cyl == 6])
plot(mtcars$wt[mtcars$cyl == 8], mtcars$mpg[mtcars$cyl == 8])

plot of chunk unnamed-chunk-3plot of chunk unnamed-chunk-3

## null device 
##           1

2. Wrappers

MakeWordBar
## function (text, min.freq = 2, top = 50, format = "count", col = "steelblue", 
##     color, ...) 
## {
##     if (!missing(color)) {
##         stop("Remember to use the argument 'col' and not 'color'.")
##     }
##     if (class(text)[1] != "VCorpus") {
##         stop("Remember to initialize text using initializeText()")
##     }
##     if (!missing(format) & (format != "count" & format != "percent")) {
##         stop("invalid 'format' argument. 'format' must either be 'count' or 'percent'")
##     }
##     old.par <- par(no.readonly = TRUE)$mar
##     tdm <- TermDocumentMatrix(text)
##     m <- as.matrix(tdm)
##     v <- sort(rowSums(m), decreasing = T)
##     d <- data.frame(word = names(v), freq = v, row.names = NULL)
##     d <- d[d$freq >= min.freq, ]
##     if (format == "count") {
##         if (!missing(top) & (top <= 0)) {
##             stop("'top' must be a number greater than 0")
##         }
##         top.words <- head(d$freq, n = top)
##         top.words.names <- head(d$word, n = top)
##         largest.word <- max(nchar(as.character(top.words.names)))
##         if (largest.word > 8) {
##             adj = (largest.word - 7)/3
##             par(mar = old.par + c(adj, 0, 0, 0))
##         }
##         else {
##             par(mar = old.par)
##         }
##     }
##     if (format == "percent") {
##         if (missing(top)) {
##             stop("'top' argument must be specificed when using format = 'percent'")
##         }
##         if (!missing(top) & (top <= 0 | top > 100)) {
##             stop("'top' must be a number greater than 0 and less than or equal to 100")
##         }
##         n <- floor(nrow(d) * top/100)
##         top.words <- head(d$freq, n = n)
##         top.words.names <- head(d$word, n = n)
##         largest.word <- max(nchar(as.character(top.words.names)))
##         if (largest.word > 8) {
##             adj = (largest.word - 7)/3
##             par(mar = old.par + c(adj, 0, 0, 0))
##         }
##         else {
##             par(mar = old.par)
##         }
##     }
##     barplot(top.words, names.arg = top.words.names, las = 2, 
##         col = col, border = "white", ...)
##     par(mar = old.par)
## }
## <environment: namespace:MobilizeSimple>

data("crude")
crude <- ProcessText(crude, removestopwords = TRUE)
MakeWordBar(crude, top=10)

plot of chunk unnamed-chunk-5

  • MobilizeSimple package
    • www.github.com/mobilizingcs

3. Labs & RStudio

Labs!

Labs!