library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
nums <- c(2.5, -3.2, 9, pi, 2, 0.2)
ints <- c(1L, 5L, 99L, 103L)
chrs <- c("my character vector")
lgls <- c(TRUE, FALSE, FALSE, TRUE)
is.vector(nums)
## [1] TRUE
is.vector(ints)
## [1] TRUE
is.vector(chrs)
## [1] TRUE
is.vector(lgls)
## [1] TRUE
length(nums)
## [1] 6
length(ints)
## [1] 4
length(chrs)
## [1] 1
length(lgls)
## [1] 4
typeof(nums)
## [1] "double"
typeof(ints)
## [1] "integer"
typeof(chrs)
## [1] "character"
typeof(lgls)
## [1] "logical"
ints
## [1] 1 5 99 103
typeof(c(1,5,99,103))
## [1] "double"
mode(nums)
## [1] "numeric"
mode(ints)
## [1] "numeric"
mode(chrs)
## [1] "character"
mode(lgls)
## [1] "logical"
storage.mode(nums)
## [1] "double"
storage.mode(ints)
## [1] "integer"
storage.mode(chrs)
## [1] "character"
storage.mode(lgls)
## [1] "logical"
class(nums)
## [1] "numeric"
class(ints)
## [1] "integer"
class(chrs)
## [1] "character"
class(lgls)
## [1] "logical"
This section in the R manual attempts to disambiguate these different functions.
attributes(nums)
## NULL
names(nums) <- LETTERS[1:length(nums)]
nums
## A B C D E F
## 2.500000 -3.200000 9.000000 3.141593 2.000000 0.200000
names(nums)
## [1] "A" "B" "C" "D" "E" "F"
attributes(nums)
## $names
## [1] "A" "B" "C" "D" "E" "F"
dim(nums)
## NULL
dim(nums) <- c(2,3)
nums
## [,1] [,2] [,3]
## [1,] 2.5 9.000000 2.0
## [2,] -3.2 3.141593 0.2
names(nums)
## NULL
dim(nums)
## [1] 2 3
attributes(nums)
## $dim
## [1] 2 3
is.vector(nums)
## [1] FALSE
typeof(nums)
## [1] "double"
is.matrix(nums)
## [1] TRUE
colnames(nums) <- LETTERS[1:ncol(nums)]
rownames(nums) <- letters[1:nrow(nums)]
nums
## A B C
## a 2.5 9.000000 2.0
## b -3.2 3.141593 0.2
attributes(nums)
## $dim
## [1] 2 3
##
## $dimnames
## $dimnames[[1]]
## [1] "a" "b"
##
## $dimnames[[2]]
## [1] "A" "B" "C"
typeof(nums)
## [1] "double"
mode(nums)
## [1] "numeric"
storage.mode(nums)
## [1] "double"
class(nums)
## [1] "matrix" "array"
nums <- as.data.frame(nums)
is.matrix(nums)
## [1] FALSE
is.data.frame(nums)
## [1] TRUE
nums
## A B C
## a 2.5 9.000000 2.0
## b -3.2 3.141593 0.2
attributes(nums)
## $names
## [1] "A" "B" "C"
##
## $class
## [1] "data.frame"
##
## $row.names
## [1] "a" "b"
typeof(nums)
## [1] "list"
mode(nums)
## [1] "list"
storage.mode(nums)
## [1] "list"
class(nums)
## [1] "data.frame"
nums <- as.list(nums)
is.data.frame(nums)
## [1] FALSE
is.list(nums)
## [1] TRUE
length(nums)
## [1] 3
nums
## $A
## [1] 2.5 -3.2
##
## $B
## [1] 9.000000 3.141593
##
## $C
## [1] 2.0 0.2
nums[[1]]
## [1] 2.5 -3.2
nums$B
## [1] 9.000000 3.141593
attributes(nums)
## $names
## [1] "A" "B" "C"
typeof(nums)
## [1] "list"
mode(nums)
## [1] "list"
storage.mode(nums)
## [1] "list"
class(nums)
## [1] "list"
l <- list(
x = 1:10,
y = rnorm(10)
)
l$model <- lm(l$y ~ l$x)
l
## $x
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $y
## [1] 0.1986962 -0.4909643 0.7522005 -0.8336690 -0.5337518 0.9960090
## [7] 1.1726551 -1.1686314 1.1338426 2.1317878
##
## $model
##
## Call:
## lm(formula = l$y ~ l$x)
##
## Coefficients:
## (Intercept) l$x
## -0.5547 0.1619
l$model
##
## Call:
## lm(formula = l$y ~ l$x)
##
## Coefficients:
## (Intercept) l$x
## -0.5547 0.1619
attributes(nums)
## $names
## [1] "A" "B" "C"
typeof(nums)
## [1] "list"
mode(nums)
## [1] "list"
storage.mode(nums)
## [1] "list"
class(nums)
## [1] "list"
attributes(l$model)
## $names
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
##
## $class
## [1] "lm"
typeof(l$model)
## [1] "list"
mode(l$model)
## [1] "list"
storage.mode(l$model)
## [1] "list"
class(l$model)
## [1] "lm"
attributes(ToothGrowth)
## $names
## [1] "len" "supp" "dose"
##
## $class
## [1] "data.frame"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
typeof(ToothGrowth)
## [1] "list"
mode(ToothGrowth)
## [1] "list"
storage.mode(ToothGrowth)
## [1] "list"
class(ToothGrowth)
## [1] "data.frame"
Columns in a data.frame my have their own attributes
attributes(ToothGrowth$supp)
## $levels
## [1] "OJ" "VC"
##
## $class
## [1] "factor"
comps <- c(1i, 2+2i, 3+4i)
attributes(comps)
## NULL
typeof(comps)
## [1] "complex"
mode(comps)
## [1] "complex"
storage.mode(comps)
## [1] "complex"
class(comps)
## [1] "complex"
raws <- raw(3)
attributes(raws)
## NULL
typeof(raws)
## [1] "raw"
mode(raws)
## [1] "raw"
storage.mode(raws)
## [1] "raw"
class(raws)
## [1] "raw"
Factors are a special type of character object that has an internal integer representation and a lookup table.
is.factor(ToothGrowth$supp)
## [1] TRUE
attributes(ToothGrowth$supp)
## $levels
## [1] "OJ" "VC"
##
## $class
## [1] "factor"
typeof(ToothGrowth$supp)
## [1] "integer"
mode(ToothGrowth$supp)
## [1] "numeric"
storage.mode(ToothGrowth$supp)
## [1] "integer"
class(ToothGrowth$supp)
## [1] "factor"
ToothGrowth$supp
## [1] VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC
## [26] VC VC VC VC VC OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
## [51] OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
## Levels: OJ VC
summary(ToothGrowth$supp)
## OJ VC
## 30 30
as.numeric(ToothGrowth$supp) # integer representation
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
as.character(ToothGrowth$supp)
## [1] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC"
## [16] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC"
## [31] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ"
## [46] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ"
nlevels(ToothGrowth$supp)
## [1] 2
levels(ToothGrowth$supp) # LOOKUP table
## [1] "OJ" "VC"
By default, the order of levels is alphabetical.
my_char <- c(letters[1:3], LETTERS[1:3])
my_char
## [1] "a" "b" "c" "A" "B" "C"
my_fact <- as.factor(my_char)
my_fact
## [1] a b c A B C
## Levels: a A b B c C
levels(my_fact)
## [1] "a" "A" "b" "B" "c" "C"
To rearrange, the levels use factor()
with the
levels
argument
my_fact2 <- factor(my_fact, levels = c(letters[1:3], LETTERS[1:3]))
my_fact2
## [1] a b c A B C
## Levels: a b c A B C
levels(my_fact2)
## [1] "a" "b" "c" "A" "B" "C"
The ordering of levels in a ggplot
are based on the
ordering in the factor.
ggplot(ToothGrowth, aes(x = supp, y = len)) +
geom_boxplot()
If you use a character vector, the ordering will be alphabetical.
This is particularly important when using numbers as alphabetical ordering is generally not what you want.
my_fact <- factor(c("a1", "a2", "a10"))
levels(my_fact)
## [1] "a1" "a10" "a2"
By default, R will use the first level as the reference level in a regression model.
m <- lm(len ~ supp, data = ToothGrowth)
coef(m)
## (Intercept) suppVC
## 20.66333 -3.70000
If we want a different level to be first, we can just move one level to the beginning rather than setting all levels.
d <- ToothGrowth %>%
mutate(supp = relevel(supp, ref = "VC"))
m <- lm(len ~ supp, data = d)
coef(m)
## (Intercept) suppOJ
## 16.96333 3.70000