Matrix activity

m = rbind(c(1, 12, 8, 6),
          c(4, 10, 2, 9),
          c(11, 3, 5, 7))
m
##      [,1] [,2] [,3] [,4]
## [1,]    1   12    8    6
## [2,]    4   10    2    9
## [3,]   11    3    5    7
# Reconstruct the matrix 
n = matrix(c(1,12,8,6,4,10,2,9,11,3,5,7), nrow=3, ncol=4, byrow=TRUE)
n
##      [,1] [,2] [,3] [,4]
## [1,]    1   12    8    6
## [2,]    4   10    2    9
## [3,]   11    3    5    7
all.equal(m,n)
## [1] TRUE
# Print the element in the 3rd-row and 4th column
n[3,4]
## [1] 7
# Print the 2nd column
n[,2]
## [1] 12 10  3
# Print all but the 3rd row
n[-3,]
##      [,1] [,2] [,3] [,4]
## [1,]    1   12    8    6
## [2,]    4   10    2    9

Factor activity

library('ISDSWorkshop')
library('dplyr')

data(GI)
write.csv(GI, file="GI.csv", row.names=FALSE) # In case the file isn't already there
GI = read.csv("GI.csv")
GI$ageC = cut(GI$age, c(-Inf, 5, 18, 45 ,60, Inf)) 
# Create icd9code
cuts = c(0, 140, 240, 280, 290, 320, 360, 390, 460, 520, 580, 630, 680, 710, 740, 760, 780, 800, 1000, Inf)
GI$icd9code = cut(GI$icd9, cuts, right=FALSE)

# Find the icd9code that is most numerous
# There are many ways to do this
table(GI$icd9code)
## 
##     [0,140)   [140,240)   [240,280)   [280,290)   [290,320)   [320,360) 
##        1611           0           0           0           0           0 
##   [360,390)   [390,460)   [460,520)   [520,580)   [580,630)   [630,680) 
##           0           0           0        7242           0           0 
##   [680,710)   [710,740)   [740,760)   [760,780)   [780,800) [800,1e+03) 
##           0           0           0           0       12229           0 
## [1e+03,Inf) 
##         162
# Eliminate zeros
GI$icd9code = factor(GI$icd9code)
table(GI$icd9code)
## 
##     [0,140)   [520,580)   [780,800) [1e+03,Inf) 
##        1611        7242       12229         162

Aggregate activity

# Aggregate the GI data set by gender, ageC, and icd9code (the ones created in the last activity).
GI %>%
  group_by(gender, ageC, icd9code) %>%
  summarize(total = n())
## Source: local data frame [40 x 4]
## Groups: gender, ageC [?]
## 
##    gender     ageC    icd9code total
##    <fctr>   <fctr>      <fctr> <int>
## 1  Female (-Inf,5]     [0,140)   150
## 2  Female (-Inf,5]   [520,580)   771
## 3  Female (-Inf,5]   [780,800)  1235
## 4  Female (-Inf,5] [1e+03,Inf)     5
## 5  Female   (5,18]     [0,140)   140
## 6  Female   (5,18]   [520,580)   677
## 7  Female   (5,18]   [780,800)  1109
## 8  Female   (5,18] [1e+03,Inf)    16
## 9  Female  (18,45]     [0,140)   283
## 10 Female  (18,45]   [520,580)  1289
## # ... with 30 more rows

ggplot2 activity

Construct a histogram and boxplot for age at facility 37 using ggplot2.

# Construct a histogram for age at facility 37.
ggplot(GI %>% filter(facility == 37), aes(x = age)) + geom_histogram(binwidth = 1)

# Construct a boxplot for age at facility 37. 
ggplot(GI %>% filter(facility == 37), aes(x = 1, y = age)) + geom_boxplot()

Construct a bar chart for the zipcode at facility 37 using ggplot2

# Construct a bar chart for the zipcode at facility 37.
ggplot(GI %>% filter(facility == 37), aes(x = trunc(zipcode/100))) + geom_bar()