m = rbind(c(1, 12, 8, 6),
c(4, 10, 2, 9),
c(11, 3, 5, 7))
m
## [,1] [,2] [,3] [,4]
## [1,] 1 12 8 6
## [2,] 4 10 2 9
## [3,] 11 3 5 7
# Reconstruct the matrix
n = matrix(c(1,12,8,6,4,10,2,9,11,3,5,7), nrow=3, ncol=4, byrow=TRUE)
n
## [,1] [,2] [,3] [,4]
## [1,] 1 12 8 6
## [2,] 4 10 2 9
## [3,] 11 3 5 7
all.equal(m,n)
## [1] TRUE
# Print the element in the 3rd-row and 4th column
n[3,4]
## [1] 7
# Print the 2nd column
n[,2]
## [1] 12 10 3
# Print all but the 3rd row
n[-3,]
## [,1] [,2] [,3] [,4]
## [1,] 1 12 8 6
## [2,] 4 10 2 9
library('MWBDSSworkshop')
library('dplyr')
data(GI)
write.csv(GI, file="GI.csv", row.names=FALSE) # In case the file isn't already there
GI = read.csv("GI.csv")
GI$ageC = cut(GI$age, c(-Inf, 5, 18, 45 ,60, Inf))
# Create icd9code
cuts = c(0, 140, 240, 280, 290, 320, 360, 390, 460, 520, 580, 630, 680, 710, 740, 760, 780, 800, 1000, Inf)
GI$icd9code = cut(GI$icd9, cuts, right=FALSE)
# Find the icd9code that is most numerous
# There are many ways to do this
table(GI$icd9code)
##
## [0,140) [140,240) [240,280) [280,290) [290,320) [320,360)
## 1611 0 0 0 0 0
## [360,390) [390,460) [460,520) [520,580) [580,630) [630,680)
## 0 0 0 7242 0 0
## [680,710) [710,740) [740,760) [760,780) [780,800) [800,1e+03)
## 0 0 0 0 12229 0
## [1e+03,Inf)
## 162
# Eliminate zeros
GI$icd9code = factor(GI$icd9code)
table(GI$icd9code)
##
## [0,140) [520,580) [780,800) [1e+03,Inf)
## 1611 7242 12229 162
# Aggregate the GI data set by gender, ageC, and icd9code (the ones created in the last activity).
GI %>%
group_by(gender, ageC, icd9code) %>%
summarize(total = n())
## # A tibble: 40 x 4
## # Groups: gender, ageC [?]
## gender ageC icd9code total
## <fct> <fct> <fct> <int>
## 1 Female (-Inf,5] [0,140) 150
## 2 Female (-Inf,5] [520,580) 771
## 3 Female (-Inf,5] [780,800) 1235
## 4 Female (-Inf,5] [1e+03,Inf) 5
## 5 Female (5,18] [0,140) 140
## 6 Female (5,18] [520,580) 677
## 7 Female (5,18] [780,800) 1109
## 8 Female (5,18] [1e+03,Inf) 16
## 9 Female (18,45] [0,140) 283
## 10 Female (18,45] [520,580) 1289
## # … with 30 more rows
Construct a histogram and boxplot for age at facility 37 using ggplot2.
# Construct a histogram for age at facility 37.
ggplot(GI %>% filter(facility == 37), aes(x = age)) + geom_histogram(binwidth = 1)
# Construct a boxplot for age at facility 37.
ggplot(GI %>% filter(facility == 37), aes(x = 1, y = age)) + geom_boxplot()
Construct a bar chart for the zipcode at facility 37 using ggplot2
# Construct a bar chart for the zipcode at facility 37.
ggplot(GI %>% filter(facility == 37), aes(x = trunc(zipcode/100))) + geom_bar()