library("tidyverse")

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Vectors

nums <- c(2.5, -3.2, 9, pi, 2, 0.2)
ints <- c(1L, 5L, 99L, 103L)
chrs <- c("my character vector")
lgls <- c(TRUE, FALSE, FALSE, TRUE)

is.vector()

is.vector(nums)

## [1] TRUE

is.vector(ints)

## [1] TRUE

is.vector(chrs)

## [1] TRUE

is.vector(lgls)

## [1] TRUE

length()

length(nums)

## [1] 6

length(ints)

## [1] 4

length(chrs)

## [1] 1

length(lgls)

## [1] 4

typeof()

typeof(nums)

## [1] "double"

typeof(ints)

## [1] "integer"

typeof(chrs)

## [1] "character"

typeof(lgls)

## [1] "logical"

ints

## [1]   1   5  99 103

typeof(c(1,5,99,103))

## [1] "double"

mode()

mode(nums)

## [1] "numeric"

mode(ints)

## [1] "numeric"

mode(chrs)

## [1] "character"

mode(lgls)

## [1] "logical"

storage.mode()

storage.mode(nums)

## [1] "double"

storage.mode(ints)

## [1] "integer"

storage.mode(chrs)

## [1] "character"

storage.mode(lgls)

## [1] "logical"

class()

class(nums)

## [1] "numeric"

class(ints)

## [1] "integer"

class(chrs)

## [1] "character"

class(lgls)

## [1] "logical"

This section in the R manual attempts to disambiguate these different functions.

Attributes

attributes(nums)

## NULL

names()

names(nums) <- LETTERS[1:length(nums)]
nums

##         A         B         C         D         E         F 
##  2.500000 -3.200000  9.000000  3.141593  2.000000  0.200000

names(nums)

## [1] "A" "B" "C" "D" "E" "F"

attributes(nums)

## $names
## [1] "A" "B" "C" "D" "E" "F"

dim()

dim(nums)

## NULL

dim(nums) <- c(2,3)
nums

##      [,1]     [,2] [,3]
## [1,]  2.5 9.000000  2.0
## [2,] -3.2 3.141593  0.2

names(nums)

## NULL

dim(nums)

## [1] 2 3

attributes(nums)

## $dim
## [1] 2 3

is.vector(nums)

## [1] FALSE

typeof(nums)

## [1] "double"

Matrices

is.matrix(nums)

## [1] TRUE

colnames(nums) <- LETTERS[1:ncol(nums)]
rownames(nums) <- letters[1:nrow(nums)]
nums

##      A        B   C
## a  2.5 9.000000 2.0
## b -3.2 3.141593 0.2

attributes(nums)

## $dim
## [1] 2 3
## 
## $dimnames
## $dimnames[[1]]
## [1] "a" "b"
## 
## $dimnames[[2]]
## [1] "A" "B" "C"

typeof(nums)

## [1] "double"

mode(nums)

## [1] "numeric"

storage.mode(nums)

## [1] "double"

class(nums)

## [1] "matrix" "array"

Data.frame

nums <- as.data.frame(nums)
is.matrix(nums)

## [1] FALSE

is.data.frame(nums)

## [1] TRUE

nums

##      A        B   C
## a  2.5 9.000000 2.0
## b -3.2 3.141593 0.2

attributes(nums)

## $names
## [1] "A" "B" "C"
## 
## $class
## [1] "data.frame"
## 
## $row.names
## [1] "a" "b"

typeof(nums)

## [1] "list"

mode(nums)

## [1] "list"

storage.mode(nums)

## [1] "list"

class(nums)

## [1] "data.frame"

List

nums <- as.list(nums)
is.data.frame(nums)

## [1] FALSE

is.list(nums)

## [1] TRUE

length(nums)

## [1] 3

nums

## $A
## [1]  2.5 -3.2
## 
## $B
## [1] 9.000000 3.141593
## 
## $C
## [1] 2.0 0.2

nums[[1]]

## [1]  2.5 -3.2

nums$B

## [1] 9.000000 3.141593

attributes(nums)

## $names
## [1] "A" "B" "C"

typeof(nums)

## [1] "list"

mode(nums)

## [1] "list"

storage.mode(nums)

## [1] "list"

class(nums)

## [1] "list"

l <- list(
  x = 1:10,
  y = rnorm(10)
)
l$model <- lm(l$y ~ l$x)

l

## $x
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $y
##  [1]  0.1986962 -0.4909643  0.7522005 -0.8336690 -0.5337518  0.9960090
##  [7]  1.1726551 -1.1686314  1.1338426  2.1317878
## 
## $model
## 
## Call:
## lm(formula = l$y ~ l$x)
## 
## Coefficients:
## (Intercept)          l$x  
##     -0.5547       0.1619

l$model

## 
## Call:
## lm(formula = l$y ~ l$x)
## 
## Coefficients:
## (Intercept)          l$x  
##     -0.5547       0.1619

attributes(nums)

## $names
## [1] "A" "B" "C"

typeof(nums)

## [1] "list"

mode(nums)

## [1] "list"

storage.mode(nums)

## [1] "list"

class(nums)

## [1] "list"

attributes(l$model)

## $names
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"        
## 
## $class
## [1] "lm"

typeof(l$model)

## [1] "list"

mode(l$model)

## [1] "list"

storage.mode(l$model)

## [1] "list"

class(l$model)

## [1] "lm"

Data frames

attributes(ToothGrowth)

## $names
## [1] "len"  "supp" "dose"
## 
## $class
## [1] "data.frame"
## 
## $row.names
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60

typeof(ToothGrowth)

## [1] "list"

mode(ToothGrowth)

## [1] "list"

storage.mode(ToothGrowth)

## [1] "list"

class(ToothGrowth)

## [1] "data.frame"

Columns in a data.frame my have their own attributes

attributes(ToothGrowth$supp)

## $levels
## [1] "OJ" "VC"
## 
## $class
## [1] "factor"

Complex

comps <- c(1i, 2+2i, 3+4i)
attributes(comps)

## NULL

typeof(comps)

## [1] "complex"

mode(comps)

## [1] "complex"

storage.mode(comps)

## [1] "complex"

class(comps)

## [1] "complex"

Raw

raws <- raw(3)
attributes(raws)

## NULL

typeof(raws)

## [1] "raw"

mode(raws)

## [1] "raw"

storage.mode(raws)

## [1] "raw"

class(raws)

## [1] "raw"

Factor

Factors are a special type of character object that has an internal integer representation and a lookup table.

is.factor(ToothGrowth$supp)

## [1] TRUE

attributes(ToothGrowth$supp)

## $levels
## [1] "OJ" "VC"
## 
## $class
## [1] "factor"

typeof(ToothGrowth$supp)

## [1] "integer"

mode(ToothGrowth$supp)

## [1] "numeric"

storage.mode(ToothGrowth$supp)

## [1] "integer"

class(ToothGrowth$supp)

## [1] "factor"

ToothGrowth$supp

##  [1] VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC VC
## [26] VC VC VC VC VC OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
## [51] OJ OJ OJ OJ OJ OJ OJ OJ OJ OJ
## Levels: OJ VC

summary(ToothGrowth$supp)

## OJ VC 
## 30 30

Lookup table

as.numeric(ToothGrowth$supp)    # integer representation

##  [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

as.character(ToothGrowth$supp)

##  [1] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC"
## [16] "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC" "VC"
## [31] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ"
## [46] "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ" "OJ"

nlevels(ToothGrowth$supp)

## [1] 2

levels(ToothGrowth$supp)   # LOOKUP table

## [1] "OJ" "VC"

Reorder levels

By default, the order of levels is alphabetical.

my_char <- c(letters[1:3], LETTERS[1:3])
my_char

## [1] "a" "b" "c" "A" "B" "C"

my_fact <- as.factor(my_char)
my_fact

## [1] a b c A B C
## Levels: a A b B c C

levels(my_fact)

## [1] "a" "A" "b" "B" "c" "C"

To rearrange, the levels use factor() with the levels argument

my_fact2 <- factor(my_fact, levels = c(letters[1:3], LETTERS[1:3]))
my_fact2

## [1] a b c A B C
## Levels: a b c A B C

levels(my_fact2)

## [1] "a" "b" "c" "A" "B" "C"

The ordering of levels in a ggplot are based on the ordering in the factor.

ggplot(ToothGrowth, aes(x = supp, y = len)) + 
  geom_boxplot()

If you use a character vector, the ordering will be alphabetical.

This is particularly important when using numbers as alphabetical ordering is generally not what you want.

my_fact <- factor(c("a1", "a2", "a10"))
levels(my_fact)

## [1] "a1"  "a10" "a2"

Reference level

By default, R will use the first level as the reference level in a regression model.

m <- lm(len ~ supp, data = ToothGrowth)
coef(m)

## (Intercept)      suppVC 
##    20.66333    -3.70000

If we want a different level to be first, we can just move one level to the beginning rather than setting all levels.

d <- ToothGrowth %>%
  mutate(supp = relevel(supp, ref = "VC"))

m <- lm(len ~ supp, data = d)
coef(m)

## (Intercept)      suppOJ 
##    16.96333     3.70000

STAT 486/586

Jarad Niemi

2023-02-12