library(tidyverse)Solution: Summarize and visualize
Get started
Count
- Count the number of penguins on each island.
count(penguins, island)# A tibble: 3 × 2
island n
<fct> <int>
1 Biscoe 168
2 Dream 124
3 Torgersen 52
- Count the number of penguins of each species on each island.
count(penguins, island, species)# A tibble: 5 × 3
island species n
<fct> <fct> <int>
1 Biscoe Adelie 44
2 Biscoe Gentoo 124
3 Dream Adelie 56
4 Dream Chinstrap 68
5 Torgersen Adelie 52
Summarize
- Calculate mean flipper length and body mass for the 3 species separately.
penguins |>
summarize(
mean_flipper = mean(flipper_len, na.rm = TRUE),
mean_body = mean(body_mass, na.rm = TRUE),
.by = species
)# A tibble: 3 × 3
species mean_flipper mean_body
<fct> <dbl> <dbl>
1 Adelie 190. 3701.
2 Gentoo 217. 5076.
3 Chinstrap 196. 3733.
- Calculate mean flipper length and body mass by species and sex. Remove penguins with unknown sex first.
penguins |>
drop_na(sex) |>
summarize(
mean_flipper = mean(flipper_len, na.rm = TRUE),
mean_body = mean(body_mass, na.rm = TRUE),
.by = c(species, sex)
)# A tibble: 6 × 4
species sex mean_flipper mean_body
<fct> <fct> <dbl> <dbl>
1 Adelie male 192. 4043.
2 Adelie female 188. 3369.
3 Gentoo female 213. 4680.
4 Gentoo male 222. 5485.
5 Chinstrap female 192. 3527.
6 Chinstrap male 200. 3939.
Combine dplyr and ggplot
- Remove penguins with missing
sex, then pipe into a boxplot of body mass withsexon the x-axis.
penguins |>
drop_na(sex) |>
ggplot(aes(x = sex, y = body_mass)) +
geom_boxplot()
- Remove penguins with missing
sex, then make a scatterplot of bill length vs. bill depth, colored by species.
penguins |>
drop_na(sex) |>
ggplot(aes(x = bill_len, y = bill_dep, color = species)) +
geom_point()
For the fast ones
Summarize mean body mass by species, then pipe the result into a bar chart with geom_col().
penguins |>
summarize(
mean_body = mean(body_mass, na.rm = TRUE),
.by = species
) |>
ggplot(aes(x = species, y = mean_body)) +
geom_col()
Calculate the min, max, and mean flipper length per species.
penguins |>
summarize(
min_flipper = min(flipper_len, na.rm = TRUE),
max_flipper = max(flipper_len, na.rm = TRUE),
mean_flipper = mean(flipper_len, na.rm = TRUE),
.by = species
)# A tibble: 3 × 4
species min_flipper max_flipper mean_flipper
<fct> <int> <int> <dbl>
1 Adelie 172 210 190.
2 Gentoo 203 231 217.
3 Chinstrap 178 212 196.
Sort the result by mean flipper length using arrange().
penguins |>
summarize(
min_flipper = min(flipper_len, na.rm = TRUE),
max_flipper = max(flipper_len, na.rm = TRUE),
mean_flipper = mean(flipper_len, na.rm = TRUE),
.by = species
) |>
arrange(mean_flipper)# A tibble: 3 × 4
species min_flipper max_flipper mean_flipper
<fct> <int> <int> <dbl>
1 Adelie 172 210 190.
2 Chinstrap 178 212 196.
3 Gentoo 203 231 217.