R code

library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Vectors

nums <- c(2.5, -3.2, 9, pi, 2, 0.2)
ints <- c(1L, 5L, 99L, 103L)
chrs <- c("my character vector")
lgls <- c(TRUE, FALSE, FALSE, TRUE)

is.vector()

is.vector(nums)
## [1] TRUE
is.vector(ints)
## [1] TRUE
is.vector(chrs)
## [1] TRUE
is.vector(lgls)
## [1] TRUE

length()

length(nums)
## [1] 6
length(ints)
## [1] 4
length(chrs)
## [1] 1
length(lgls)
## [1] 4

typeof()

typeof(nums)
## [1] "double"
typeof(ints)
## [1] "integer"
typeof(chrs)
## [1] "character"
typeof(lgls)
## [1] "logical"
ints
## [1]   1   5  99 103
typeof(c(1,5,99,103))
## [1] "double"

mode()

mode(nums)
## [1] "numeric"
mode(ints)
## [1] "numeric"
mode(chrs)
## [1] "character"
mode(lgls)
## [1] "logical"

storage.mode()

storage.mode(nums)
## [1] "double"
storage.mode(ints)
## [1] "integer"
storage.mode(chrs)
## [1] "character"
storage.mode(lgls)
## [1] "logical"

class()

class(nums)
## [1] "numeric"
class(ints)
## [1] "integer"
class(chrs)
## [1] "character"
class(lgls)
## [1] "logical"

This section in the R manual attempts to disambiguate these different functions.

Attributes

attributes(nums)
## NULL

names()

names(nums) <- LETTERS[1:length(nums)]
nums
##         A         B         C         D         E         F 
##  2.500000 -3.200000  9.000000  3.141593  2.000000  0.200000
names(nums)
## [1] "A" "B" "C" "D" "E" "F"
attributes(nums)
## $names
## [1] "A" "B" "C" "D" "E" "F"

dim()

dim(nums)
## NULL
dim(nums) <- c(2,3)
nums
##      [,1]     [,2] [,3]
## [1,]  2.5 9.000000  2.0
## [2,] -3.2 3.141593  0.2
names(nums)
## NULL
dim(nums)
## [1] 2 3
attributes(nums)
## $dim
## [1] 2 3
is.vector(nums)
## [1] FALSE
typeof(nums)
## [1] "double"

Matrices

is.matrix(nums)
## [1] TRUE
colnames(nums) <- LETTERS[1:ncol(nums)]
rownames(nums) <- letters[1:nrow(nums)]
nums
##      A        B   C
## a  2.5 9.000000 2.0
## b -3.2 3.141593 0.2
attributes(nums)
## $dim
## [1] 2 3
## 
## $dimnames
## $dimnames[[1]]
## [1] "a" "b"
## 
## $dimnames[[2]]
## [1] "A" "B" "C"
typeof(nums)
## [1] "double"
mode(nums)
## [1] "numeric"
storage.mode(nums)
## [1] "double"
class(nums)
## [1] "matrix" "array"

Data.frame

nums <- as.data.frame(nums)
is.matrix(nums)
## [1] FALSE
is.data.frame(nums)
## [1] TRUE
nums
##      A        B   C
## a  2.5 9.000000 2.0
## b -3.2 3.141593 0.2
attributes(nums)
## $names
## [1] "A" "B" "C"
## 
## $class
## [1] "data.frame"
## 
## $row.names
## [1] "a" "b"
typeof(nums)
## [1] "list"
mode(nums)
## [1] "list"
storage.mode(nums)
## [1] "list"
class(nums)
## [1] "data.frame"

List

nums <- as.list(nums)
is.data.frame(nums)
## [1] FALSE
is.list(nums)
## [1] TRUE
length(nums)
## [1] 3
nums
## $A
## [1]  2.5 -3.2
## 
## $B
## [1] 9.000000 3.141593
## 
## $C
## [1] 2.0 0.2
nums[[1]]
## [1]  2.5 -3.2
nums$B
## [1] 9.000000 3.141593
attributes(nums)
## $names
## [1] "A" "B" "C"
typeof(nums)
## [1] "list"
mode(nums)
## [1] "list"
storage.mode(nums)
## [1] "list"
class(nums)
## [1] "list"
l <- list(
  x = 1:10,
  y = rnorm(10)
)
l$model <- lm(l$y ~ l$x)

l
## $x
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $y
##  [1]  0.1986962 -0.4909643  0.7522005 -0.8336690 -0.5337518  0.9960090
##  [7]  1.1726551 -1.1686314  1.1338426  2.1317878
## 
## $model
## 
## Call:
## lm(formula = l$y ~ l$x)
## 
## Coefficients:
## (Intercept)          l$x  
##     -0.5547       0.1619
l$model
## 
## Call:
## lm(formula = l$y ~ l$x)
## 
## Coefficients:
## (Intercept)          l$x  
##     -0.5547       0.1619
attributes(nums)
## $names
## [1] "A" "B" "C"
typeof(nums)
## [1] "list"
mode(nums)
## [1] "list"
storage.mode(nums)
## [1] "list"
class(nums)
## [1] "list"
attributes(l$model)
## $names
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"        
## 
## $class
## [1] "lm"
typeof(l$model)
## [1] "list"
mode(l$model)
## [1] "list"
storage.mode(l$model)
## [1] "list"
class(l$model)
## [1] "lm"

Data frames

attributes(ToothGrowth)
## $names
## [1] "len"  "supp" "dose"
## 
## $class
## [1] "data.frame"
## 
## $row.names
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
typeof(ToothGrowth)
## [1] "list"
mode(ToothGrowth)
## [1] "list"
storage.mode(ToothGrowth)
## [1] "list"
class(ToothGrowth)
## [1] "data.frame"

Columns in a data.frame my have their own attributes

attributes(ToothGrowth$supp)
## $levels
## [1] "OJ" "VC"
## 
## $class
## [1] "factor"

Complex

comps <- c(1i, 2+2i, 3+4i)
attributes(comps)
## NULL
typeof(comps)
## [1] "complex"
mode(comps)
## [1] "complex"
storage.mode(comps)
## [1] "complex"
class(comps)
## [1] "complex"

Raw

raws <- raw(3)
attributes(raws)
## NULL
typeof(raws)
## [1] "raw"
mode(raws)
## [1] "raw"
storage.mode(raws)
## [1] "raw"
class(raws)
## [1] "raw"

Factor

Factors are a special type of character object that has an internal integer representation and a lookup table.

is.factor(ToothGrowth$supp)
## [1] TRUE
attributes(ToothGrowth$supp)
## $levels
## [1] "OJ" "VC"
## 
## $class
## [1] "factor"
typeof(ToothGrowth$supp)
## [1] "integer"
mode(ToothGrowth$supp)
## [1] "numeric"
storage.mode(ToothGrowth$supp)
## [1] "integer"
class(ToothGrowth$supp)
## [1] "factor"
ToothGrowth$supp
##  [1] VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC
## [26] VC VC VC VC VC OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
## [51] OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
## Levels: OJ VC
summary(ToothGrowth$supp)
## OJ VC 
## 30 30

Lookup table

as.numeric(ToothGrowth$supp)    # integer representation
##  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
as.character(ToothGrowth$supp)
##  [1] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC"
## [16] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC"
## [31] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ"
## [46] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ"
nlevels(ToothGrowth$supp)
## [1] 2
levels(ToothGrowth$supp)   # LOOKUP table
## [1] "OJ" "VC"

Reorder levels

By default, the order of levels is alphabetical.

my_char <- c(letters[1:3], LETTERS[1:3])
my_char
## [1] "a" "b" "c" "A" "B" "C"
my_fact <- as.factor(my_char)
my_fact
## [1] a b c A B C
## Levels: a A b B c C
levels(my_fact)
## [1] "a" "A" "b" "B" "c" "C"

To rearrange, the levels use factor() with the levels argument

my_fact2 <- factor(my_fact, levels = c(letters[1:3], LETTERS[1:3]))
my_fact2
## [1] a b c A B C
## Levels: a b c A B C
levels(my_fact2)
## [1] "a" "b" "c" "A" "B" "C"

The ordering of levels in a ggplot are based on the ordering in the factor.

ggplot(ToothGrowth, aes(x = supp, y = len)) + 
  geom_boxplot()

If you use a character vector, the ordering will be alphabetical.

This is particularly important when using numbers as alphabetical ordering is generally not what you want.

my_fact <- factor(c("a1", "a2", "a10"))
levels(my_fact)
## [1] "a1"  "a10" "a2"

Reference level

By default, R will use the first level as the reference level in a regression model.

m <- lm(len ~ supp, data = ToothGrowth)
coef(m)
## (Intercept)      suppVC 
##    20.66333    -3.70000

If we want a different level to be first, we can just move one level to the beginning rather than setting all levels.

d <- ToothGrowth %>%
  mutate(supp = relevel(supp, ref = "VC"))

m <- lm(len ~ supp, data = d)
coef(m)
## (Intercept)      suppOJ 
##    16.96333     3.70000