x = 1:10
y = rep(c(1,2), each = 5)
m = lm(y ~ x)
s = summary(m)
Now, look at the result of each line
x
## [1] 1 2 3 4 5 6 7 8 9 10
y
## [1] 1 1 1 1 1 2 2 2 2 2
m
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.6667 0.1515
s
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4242 -0.1667 0.0000 0.1667 0.4242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6667 0.1880 3.546 0.00756 **
## x 0.1515 0.0303 5.000 0.00105 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2752 on 8 degrees of freedom
## Multiple R-squared: 0.7576, Adjusted R-squared: 0.7273
## F-statistic: 25 on 1 and 8 DF, p-value: 0.001053
s$r.squared
## [1] 0.7575758
For those who are familiar with linear regression, this may look familiar.
Calculate the probability the individual has the disease if the test is positive when
specificity = 0.95
sensitivity = 0.99
prevalence = 0.001
probability = (sensitivity*prevalence) / (sensitivity*prevalence + (1-specificity)*(1-prevalence))
probability
## [1] 0.01943463
Yes, it is only about 2%!
Read in the fluTrends.csv
file.
# Read in the csv file
fluTrends = read.csv('fluTrends.csv')
names(fluTrends)
## [1] "Date"
## [2] "United.States"
## [3] "Alabama"
## [4] "Alaska"
## [5] "Arizona"
## [6] "Arkansas"
## [7] "California"
## [8] "Colorado"
## [9] "Connecticut"
## [10] "Delaware"
## [11] "District.of.Columbia"
## [12] "Florida"
## [13] "Georgia"
## [14] "Hawaii"
## [15] "Idaho"
## [16] "Illinois"
## [17] "Indiana"
## [18] "Iowa"
## [19] "Kansas"
## [20] "Kentucky"
## [21] "Louisiana"
## [22] "Maine"
## [23] "Maryland"
## [24] "Massachusetts"
## [25] "Michigan"
## [26] "Minnesota"
## [27] "Mississippi"
## [28] "Missouri"
## [29] "Montana"
## [30] "Nebraska"
## [31] "Nevada"
## [32] "New.Hampshire"
## [33] "New.Jersey"
## [34] "New.Mexico"
## [35] "New.York"
## [36] "North.Carolina"
## [37] "North.Dakota"
## [38] "Ohio"
## [39] "Oklahoma"
## [40] "Oregon"
## [41] "Pennsylvania"
## [42] "Rhode.Island"
## [43] "South.Carolina"
## [44] "South.Dakota"
## [45] "Tennessee"
## [46] "Texas"
## [47] "Utah"
## [48] "Vermont"
## [49] "Virginia"
## [50] "Washington"
## [51] "West.Virginia"
## [52] "Wisconsin"
## [53] "Wyoming"
## [54] "HHS.Region.1..CT..ME..MA..NH..RI..VT."
## [55] "HHS.Region.2..NJ..NY."
## [56] "HHS.Region.3..DE..DC..MD..PA..VA..WV."
## [57] "HHS.Region.4..AL..FL..GA..KY..MS..NC..SC..TN."
## [58] "HHS.Region.5..IL..IN..MI..MN..OH..WI."
## [59] "HHS.Region.6..AR..LA..NM..OK..TX."
## [60] "HHS.Region.7..IA..KS..MO..NE."
## [61] "HHS.Region.8..CO..MT..ND..SD..UT..WY."
## [62] "HHS.Region.9..AZ..CA..HI..NV."
## [63] "HHS.Region.10..AK..ID..OR..WA."
## [64] "Anchorage..AK"
## [65] "Birmingham..AL"
## [66] "Little.Rock..AR"
## [67] "Mesa..AZ"
## [68] "Phoenix..AZ"
## [69] "Scottsdale..AZ"
## [70] "Tempe..AZ"
## [71] "Tucson..AZ"
## [72] "Berkeley..CA"
## [73] "Fresno..CA"
## [74] "Irvine..CA"
## [75] "Los.Angeles..CA"
## [76] "Oakland..CA"
## [77] "Sacramento..CA"
## [78] "San.Diego..CA"
## [79] "San.Francisco..CA"
## [80] "San.Jose..CA"
## [81] "Santa.Clara..CA"
## [82] "Sunnyvale..CA"
## [83] "Colorado.Springs..CO"
## [84] "Denver..CO"
## [85] "Washington..DC"
## [86] "Gainesville..FL"
## [87] "Jacksonville..FL"
## [88] "Miami..FL"
## [89] "Orlando..FL"
## [90] "Tampa..FL"
## [91] "Atlanta..GA"
## [92] "Roswell..GA"
## [93] "Honolulu..HI"
## [94] "Des.Moines..IA"
## [95] "Boise..ID"
## [96] "Chicago..IL"
## [97] "Indianapolis..IN"
## [98] "Wichita..KS"
## [99] "Lexington..KY"
## [100] "Baton.Rouge..LA"
## [101] "New.Orleans..LA"
## [102] "Boston..MA"
## [103] "Somerville..MA"
## [104] "Baltimore..MD"
## [105] "Grand.Rapids..MI"
## [106] "St.Paul..MN"
## [107] "Kansas.City..MO"
## [108] "Springfield..MO"
## [109] "St.Louis..MO"
## [110] "Jackson..MS"
## [111] "Cary..NC"
## [112] "Charlotte..NC"
## [113] "Durham..NC"
## [114] "Greensboro..NC"
## [115] "Raleigh..NC"
## [116] "Lincoln..NE"
## [117] "Omaha..NE"
## [118] "Newark..NJ"
## [119] "Albuquerque..NM"
## [120] "Las.Vegas..NV"
## [121] "Reno..NV"
## [122] "Albany..NY"
## [123] "Buffalo..NY"
## [124] "New.York..NY"
## [125] "Rochester..NY"
## [126] "Cleveland..OH"
## [127] "Columbus..OH"
## [128] "Dayton..OH"
## [129] "Oklahoma.City..OK"
## [130] "Tulsa..OK"
## [131] "Beaverton..OR"
## [132] "Eugene..OR"
## [133] "Portland..OR"
## [134] "Philadelphia..PA"
## [135] "Pittsburgh..PA"
## [136] "State.College..PA"
## [137] "Providence..RI"
## [138] "Columbia..SC"
## [139] "Greenville..SC"
## [140] "Knoxville..TN"
## [141] "Memphis..TN"
## [142] "Nashville..TN"
## [143] "Austin..TX"
## [144] "Dallas..TX"
## [145] "Ft.Worth..TX"
## [146] "Houston..TX"
## [147] "Irving..TX"
## [148] "Lubbock..TX"
## [149] "Plano..TX"
## [150] "San.Antonio..TX"
## [151] "Salt.Lake.City..UT"
## [152] "Arlington..VA"
## [153] "Norfolk..VA"
## [154] "Reston..VA"
## [155] "Richmond..VA"
## [156] "Bellevue..WA"
## [157] "Seattle..WA"
## [158] "Spokane..WA"
## [159] "Madison..WI"
## [160] "Milwaukee..WI"
# To maintain pretty column names, use
fluTrends = read.csv('fluTrends.csv', check.names = FALSE)
names(fluTrends)
## [1] "Date"
## [2] "United.States"
## [3] "Alabama"
## [4] "Alaska"
## [5] "Arizona"
## [6] "Arkansas"
## [7] "California"
## [8] "Colorado"
## [9] "Connecticut"
## [10] "Delaware"
## [11] "District.of.Columbia"
## [12] "Florida"
## [13] "Georgia"
## [14] "Hawaii"
## [15] "Idaho"
## [16] "Illinois"
## [17] "Indiana"
## [18] "Iowa"
## [19] "Kansas"
## [20] "Kentucky"
## [21] "Louisiana"
## [22] "Maine"
## [23] "Maryland"
## [24] "Massachusetts"
## [25] "Michigan"
## [26] "Minnesota"
## [27] "Mississippi"
## [28] "Missouri"
## [29] "Montana"
## [30] "Nebraska"
## [31] "Nevada"
## [32] "New.Hampshire"
## [33] "New.Jersey"
## [34] "New.Mexico"
## [35] "New.York"
## [36] "North.Carolina"
## [37] "North.Dakota"
## [38] "Ohio"
## [39] "Oklahoma"
## [40] "Oregon"
## [41] "Pennsylvania"
## [42] "Rhode.Island"
## [43] "South.Carolina"
## [44] "South.Dakota"
## [45] "Tennessee"
## [46] "Texas"
## [47] "Utah"
## [48] "Vermont"
## [49] "Virginia"
## [50] "Washington"
## [51] "West.Virginia"
## [52] "Wisconsin"
## [53] "Wyoming"
## [54] "HHS.Region.1..CT..ME..MA..NH..RI..VT."
## [55] "HHS.Region.2..NJ..NY."
## [56] "HHS.Region.3..DE..DC..MD..PA..VA..WV."
## [57] "HHS.Region.4..AL..FL..GA..KY..MS..NC..SC..TN."
## [58] "HHS.Region.5..IL..IN..MI..MN..OH..WI."
## [59] "HHS.Region.6..AR..LA..NM..OK..TX."
## [60] "HHS.Region.7..IA..KS..MO..NE."
## [61] "HHS.Region.8..CO..MT..ND..SD..UT..WY."
## [62] "HHS.Region.9..AZ..CA..HI..NV."
## [63] "HHS.Region.10..AK..ID..OR..WA."
## [64] "Anchorage..AK"
## [65] "Birmingham..AL"
## [66] "Little.Rock..AR"
## [67] "Mesa..AZ"
## [68] "Phoenix..AZ"
## [69] "Scottsdale..AZ"
## [70] "Tempe..AZ"
## [71] "Tucson..AZ"
## [72] "Berkeley..CA"
## [73] "Fresno..CA"
## [74] "Irvine..CA"
## [75] "Los.Angeles..CA"
## [76] "Oakland..CA"
## [77] "Sacramento..CA"
## [78] "San.Diego..CA"
## [79] "San.Francisco..CA"
## [80] "San.Jose..CA"
## [81] "Santa.Clara..CA"
## [82] "Sunnyvale..CA"
## [83] "Colorado.Springs..CO"
## [84] "Denver..CO"
## [85] "Washington..DC"
## [86] "Gainesville..FL"
## [87] "Jacksonville..FL"
## [88] "Miami..FL"
## [89] "Orlando..FL"
## [90] "Tampa..FL"
## [91] "Atlanta..GA"
## [92] "Roswell..GA"
## [93] "Honolulu..HI"
## [94] "Des.Moines..IA"
## [95] "Boise..ID"
## [96] "Chicago..IL"
## [97] "Indianapolis..IN"
## [98] "Wichita..KS"
## [99] "Lexington..KY"
## [100] "Baton.Rouge..LA"
## [101] "New.Orleans..LA"
## [102] "Boston..MA"
## [103] "Somerville..MA"
## [104] "Baltimore..MD"
## [105] "Grand.Rapids..MI"
## [106] "St.Paul..MN"
## [107] "Kansas.City..MO"
## [108] "Springfield..MO"
## [109] "St.Louis..MO"
## [110] "Jackson..MS"
## [111] "Cary..NC"
## [112] "Charlotte..NC"
## [113] "Durham..NC"
## [114] "Greensboro..NC"
## [115] "Raleigh..NC"
## [116] "Lincoln..NE"
## [117] "Omaha..NE"
## [118] "Newark..NJ"
## [119] "Albuquerque..NM"
## [120] "Las.Vegas..NV"
## [121] "Reno..NV"
## [122] "Albany..NY"
## [123] "Buffalo..NY"
## [124] "New.York..NY"
## [125] "Rochester..NY"
## [126] "Cleveland..OH"
## [127] "Columbus..OH"
## [128] "Dayton..OH"
## [129] "Oklahoma.City..OK"
## [130] "Tulsa..OK"
## [131] "Beaverton..OR"
## [132] "Eugene..OR"
## [133] "Portland..OR"
## [134] "Philadelphia..PA"
## [135] "Pittsburgh..PA"
## [136] "State.College..PA"
## [137] "Providence..RI"
## [138] "Columbia..SC"
## [139] "Greenville..SC"
## [140] "Knoxville..TN"
## [141] "Memphis..TN"
## [142] "Nashville..TN"
## [143] "Austin..TX"
## [144] "Dallas..TX"
## [145] "Ft.Worth..TX"
## [146] "Houston..TX"
## [147] "Irving..TX"
## [148] "Lubbock..TX"
## [149] "Plano..TX"
## [150] "San.Antonio..TX"
## [151] "Salt.Lake.City..UT"
## [152] "Arlington..VA"
## [153] "Norfolk..VA"
## [154] "Reston..VA"
## [155] "Richmond..VA"
## [156] "Bellevue..WA"
## [157] "Seattle..WA"
## [158] "Spokane..WA"
## [159] "Madison..WI"
## [160] "Milwaukee..WI"
# unfortunately these names won't work with the
# fluTrends$colname syntax, but you can use back-ticks
summary(fluTrends$`United States`)
## Length Class Mode
## 0 NULL NULL
# Min, max, mean, and median age for zipcode 20032.
GI_20032 <- GI %>%
filter(zipcode == 20032)
min( GI_20032$age)
## [1] 0
max( GI_20032$age)
## [1] 93
mean( GI_20032$age)
## [1] 28.47843
median(GI_20032$age)
## [1] 26.5
Alternatively
summary(GI_20032$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.00 26.50 28.48 41.00 93.00
Construct a histogram and boxplot for age at facility 37.
# Construct a histogram and boxplot for age at facility 37.
GI_37 <- GI %>%
filter(facility == 37)
hist(GI_37$age)
# Construct a boxplot for age at facility 37.
boxplot(GI_37$age)
Construct a bar chart for the zipcode at facility 37.
# Construct a bar chart for the zipcode at facility 37.
barplot(table(GI_37$zipcode))
Perhaps this plot isn’t so useful. Maybe it would be better to just use the first 3 zipcode digits
# Construct a bar chart for the first three digits of zipcode at facility 37.
barplot(table(trunc(GI_37$zipcode/100)))