Solution: Summarize and visualize

Get started

library(tidyverse)

Count

  1. Count the number of penguins on each island.
count(penguins, island)
# A tibble: 3 × 2
  island        n
  <fct>     <int>
1 Biscoe      168
2 Dream       124
3 Torgersen    52
  1. Count the number of penguins of each species on each island.
count(penguins, island, species)
# A tibble: 5 × 3
  island    species       n
  <fct>     <fct>     <int>
1 Biscoe    Adelie       44
2 Biscoe    Gentoo      124
3 Dream     Adelie       56
4 Dream     Chinstrap    68
5 Torgersen Adelie       52

Summarize

  1. Calculate mean flipper length and body mass for the 3 species separately.
penguins |>
  summarize(
    mean_flipper = mean(flipper_len, na.rm = TRUE),
    mean_body = mean(body_mass, na.rm = TRUE),
    .by = species
  )
# A tibble: 3 × 3
  species   mean_flipper mean_body
  <fct>            <dbl>     <dbl>
1 Adelie            190.     3701.
2 Gentoo            217.     5076.
3 Chinstrap         196.     3733.
  1. Calculate mean flipper length and body mass by species and sex. Remove penguins with unknown sex first.
penguins |>
  drop_na(sex) |>
  summarize(
    mean_flipper = mean(flipper_len, na.rm = TRUE),
    mean_body = mean(body_mass, na.rm = TRUE),
    .by = c(species, sex)
  )
# A tibble: 6 × 4
  species   sex    mean_flipper mean_body
  <fct>     <fct>         <dbl>     <dbl>
1 Adelie    male           192.     4043.
2 Adelie    female         188.     3369.
3 Gentoo    female         213.     4680.
4 Gentoo    male           222.     5485.
5 Chinstrap female         192.     3527.
6 Chinstrap male           200.     3939.

Combine dplyr and ggplot

  1. Remove penguins with missing sex, then pipe into a boxplot of body mass with sex on the x-axis.
penguins |>
  drop_na(sex) |>
  ggplot(aes(x = sex, y = body_mass)) +
  geom_boxplot()

  1. Remove penguins with missing sex, then make a scatterplot of bill length vs. bill depth, colored by species.
penguins |>
  drop_na(sex) |>
  ggplot(aes(x = bill_len, y = bill_dep, color = species)) +
  geom_point()

For the fast ones

Summarize mean body mass by species, then pipe the result into a bar chart with geom_col().

penguins |>
  summarize(
    mean_body = mean(body_mass, na.rm = TRUE),
    .by = species
  ) |>
  ggplot(aes(x = species, y = mean_body)) +
  geom_col()

Calculate the min, max, and mean flipper length per species.

penguins |>
  summarize(
    min_flipper = min(flipper_len, na.rm = TRUE),
    max_flipper = max(flipper_len, na.rm = TRUE),
    mean_flipper = mean(flipper_len, na.rm = TRUE),
    .by = species
  )
# A tibble: 3 × 4
  species   min_flipper max_flipper mean_flipper
  <fct>           <int>       <int>        <dbl>
1 Adelie            172         210         190.
2 Gentoo            203         231         217.
3 Chinstrap         178         212         196.

Sort the result by mean flipper length using arrange().

penguins |>
  summarize(
    min_flipper = min(flipper_len, na.rm = TRUE),
    max_flipper = max(flipper_len, na.rm = TRUE),
    mean_flipper = mean(flipper_len, na.rm = TRUE),
    .by = species
  ) |>
  arrange(mean_flipper)
# A tibble: 3 × 4
  species   min_flipper max_flipper mean_flipper
  <fct>           <int>       <int>        <dbl>
1 Adelie            172         210         190.
2 Chinstrap         178         212         196.
3 Gentoo            203         231         217.