Solution: Filter, select, and mutate

Get started

library(tidyverse)

Filter penguins

Find all penguins that …

  1. … have a bill length between 40 and 45 mm.
filter(penguins, bill_len >= 40 & bill_len <= 45)
# A tibble: 77 × 8
   species island    bill_len bill_dep flipper_len body_mass sex     year
   <fct>   <fct>        <dbl>    <dbl>       <int>     <int> <fct>  <int>
 1 Adelie  Torgersen     40.3     18           195      3250 female  2007
 2 Adelie  Torgersen     42       20.2         190      4250 <NA>    2007
 3 Adelie  Torgersen     41.1     17.6         182      3200 female  2007
 4 Adelie  Torgersen     42.5     20.7         197      4500 male    2007
 5 Adelie  Biscoe        40.6     18.6         183      3550 male    2007
 6 Adelie  Biscoe        40.5     17.9         187      3200 female  2007
 7 Adelie  Biscoe        40.5     18.9         180      3950 male    2007
 8 Adelie  Dream         40.9     18.9         184      3900 male    2007
 9 Adelie  Dream         42.2     18.5         180      3550 female  2007
10 Adelie  Dream         40.8     18.4         195      3900 male    2007
# ℹ 67 more rows
  1. … are of the species Adelie or Gentoo.
filter(penguins, species %in% c("Adelie", "Gentoo"))
# A tibble: 276 × 8
   species island    bill_len bill_dep flipper_len body_mass sex     year
   <fct>   <fct>        <dbl>    <dbl>       <int>     <int> <fct>  <int>
 1 Adelie  Torgersen     39.1     18.7         181      3750 male    2007
 2 Adelie  Torgersen     39.5     17.4         186      3800 female  2007
 3 Adelie  Torgersen     40.3     18           195      3250 female  2007
 4 Adelie  Torgersen     NA       NA            NA        NA <NA>    2007
 5 Adelie  Torgersen     36.7     19.3         193      3450 female  2007
 6 Adelie  Torgersen     39.3     20.6         190      3650 male    2007
 7 Adelie  Torgersen     38.9     17.8         181      3625 female  2007
 8 Adelie  Torgersen     39.2     19.6         195      4675 male    2007
 9 Adelie  Torgersen     34.1     18.1         193      3475 <NA>    2007
10 Adelie  Torgersen     42       20.2         190      4250 <NA>    2007
# ℹ 266 more rows
# or
# filter(penguins, species == "Adelie" | species == "Gentoo")
  1. … lived on the island Dream in the year 2007.
filter(penguins, island == "Dream" & year == 2007)
# A tibble: 46 × 8
   species island bill_len bill_dep flipper_len body_mass sex     year
   <fct>   <fct>     <dbl>    <dbl>       <int>     <int> <fct>  <int>
 1 Adelie  Dream      39.5     16.7         178      3250 female  2007
 2 Adelie  Dream      37.2     18.1         178      3900 male    2007
 3 Adelie  Dream      39.5     17.8         188      3300 female  2007
 4 Adelie  Dream      40.9     18.9         184      3900 male    2007
 5 Adelie  Dream      36.4     17           195      3325 female  2007
 6 Adelie  Dream      39.2     21.1         196      4150 male    2007
 7 Adelie  Dream      38.8     20           190      3950 male    2007
 8 Adelie  Dream      42.2     18.5         180      3550 female  2007
 9 Adelie  Dream      37.6     19.3         181      3300 female  2007
10 Adelie  Dream      39.8     19.1         184      4650 male    2007
# ℹ 36 more rows

Remove missing values

  1. Remove all penguins with missing values for sex.
drop_na(penguins, sex)
# A tibble: 333 × 8
   species island    bill_len bill_dep flipper_len body_mass sex     year
   <fct>   <fct>        <dbl>    <dbl>       <int>     <int> <fct>  <int>
 1 Adelie  Torgersen     39.1     18.7         181      3750 male    2007
 2 Adelie  Torgersen     39.5     17.4         186      3800 female  2007
 3 Adelie  Torgersen     40.3     18           195      3250 female  2007
 4 Adelie  Torgersen     36.7     19.3         193      3450 female  2007
 5 Adelie  Torgersen     39.3     20.6         190      3650 male    2007
 6 Adelie  Torgersen     38.9     17.8         181      3625 female  2007
 7 Adelie  Torgersen     39.2     19.6         195      4675 male    2007
 8 Adelie  Torgersen     41.1     17.6         182      3200 female  2007
 9 Adelie  Torgersen     38.6     21.2         191      3800 male    2007
10 Adelie  Torgersen     34.6     21.1         198      4400 male    2007
# ℹ 323 more rows

Select columns

  1. Select only the variables species, sex, and year.
select(penguins, species, sex, year)
# A tibble: 344 × 3
   species sex     year
   <fct>   <fct>  <int>
 1 Adelie  male    2007
 2 Adelie  female  2007
 3 Adelie  female  2007
 4 Adelie  <NA>    2007
 5 Adelie  female  2007
 6 Adelie  male    2007
 7 Adelie  female  2007
 8 Adelie  male    2007
 9 Adelie  <NA>    2007
10 Adelie  <NA>    2007
# ℹ 334 more rows
  1. Select only columns that start with "bill".
select(penguins, starts_with("bill"))
# A tibble: 344 × 2
   bill_len bill_dep
      <dbl>    <dbl>
 1     39.1     18.7
 2     39.5     17.4
 3     40.3     18  
 4     NA       NA  
 5     36.7     19.3
 6     39.3     20.6
 7     38.9     17.8
 8     39.2     19.6
 9     34.1     18.1
10     42       20.2
# ℹ 334 more rows

Add new columns

  1. Add a column with the ratio of bill length to bill depth.
mutate(penguins, ratio = bill_len / bill_dep)
# A tibble: 344 × 9
   species island    bill_len bill_dep flipper_len body_mass sex     year ratio
   <fct>   <fct>        <dbl>    <dbl>       <int>     <int> <fct>  <int> <dbl>
 1 Adelie  Torgersen     39.1     18.7         181      3750 male    2007  2.09
 2 Adelie  Torgersen     39.5     17.4         186      3800 female  2007  2.27
 3 Adelie  Torgersen     40.3     18           195      3250 female  2007  2.24
 4 Adelie  Torgersen     NA       NA            NA        NA <NA>    2007 NA   
 5 Adelie  Torgersen     36.7     19.3         193      3450 female  2007  1.90
 6 Adelie  Torgersen     39.3     20.6         190      3650 male    2007  1.91
 7 Adelie  Torgersen     38.9     17.8         181      3625 female  2007  2.19
 8 Adelie  Torgersen     39.2     19.6         195      4675 male    2007  2   
 9 Adelie  Torgersen     34.1     18.1         193      3475 <NA>    2007  1.88
10 Adelie  Torgersen     42       20.2         190      4250 <NA>    2007  2.08
# ℹ 334 more rows
  1. Add a column with abbreviations for the species (Adelie = A, Gentoo = G, Chinstrap = C).
mutate(
  penguins,
  species_short = case_when(
    species == "Adelie" ~ "A",
    species == "Gentoo" ~ "G",
    species == "Chinstrap" ~ "C",
    .default = NA
  )
)
# A tibble: 344 × 9
   species island    bill_len bill_dep flipper_len body_mass sex     year
   <fct>   <fct>        <dbl>    <dbl>       <int>     <int> <fct>  <int>
 1 Adelie  Torgersen     39.1     18.7         181      3750 male    2007
 2 Adelie  Torgersen     39.5     17.4         186      3800 female  2007
 3 Adelie  Torgersen     40.3     18           195      3250 female  2007
 4 Adelie  Torgersen     NA       NA            NA        NA <NA>    2007
 5 Adelie  Torgersen     36.7     19.3         193      3450 female  2007
 6 Adelie  Torgersen     39.3     20.6         190      3650 male    2007
 7 Adelie  Torgersen     38.9     17.8         181      3625 female  2007
 8 Adelie  Torgersen     39.2     19.6         195      4675 male    2007
 9 Adelie  Torgersen     34.1     18.1         193      3475 <NA>    2007
10 Adelie  Torgersen     42       20.2         190      4250 <NA>    2007
# ℹ 334 more rows
# ℹ 1 more variable: species_short <chr>

Combine with the pipe

  1. Use the pipe to: remove rows with missing sex, keep only Adelie penguins, and select species, sex, and body_mass.
penguins |>
  drop_na(sex) |>
  filter(species == "Adelie") |>
  select(species, sex, body_mass)
# A tibble: 146 × 3
   species sex    body_mass
   <fct>   <fct>      <int>
 1 Adelie  male        3750
 2 Adelie  female      3800
 3 Adelie  female      3250
 4 Adelie  female      3450
 5 Adelie  male        3650
 6 Adelie  female      3625
 7 Adelie  male        4675
 8 Adelie  female      3200
 9 Adelie  male        3800
10 Adelie  male        4400
# ℹ 136 more rows

For the fast ones

Use filter_out() to exclude penguins from Torgersen island, then select only species, island, and flipper_len.

penguins |>
  filter_out(island == "Torgersen") |>
  select(species, island, flipper_len)
# A tibble: 292 × 3
   species island flipper_len
   <fct>   <fct>        <int>
 1 Adelie  Biscoe         174
 2 Adelie  Biscoe         180
 3 Adelie  Biscoe         189
 4 Adelie  Biscoe         185
 5 Adelie  Biscoe         180
 6 Adelie  Biscoe         187
 7 Adelie  Biscoe         183
 8 Adelie  Biscoe         187
 9 Adelie  Biscoe         172
10 Adelie  Biscoe         180
# ℹ 282 more rows

Create a size_category column with case_when based on body mass, in a pipe that also removes NAs and selects only species, body_mass, and size_category.

penguins |>
  drop_na(body_mass) |>
  mutate(
    size_category = case_when(
      body_mass < 3500 ~ "small",
      body_mass < 5000 ~ "medium",
      body_mass >= 5000 ~ "large"
    )
  ) |>
  select(species, body_mass, size_category)
# A tibble: 342 × 3
   species body_mass size_category
   <fct>       <int> <chr>        
 1 Adelie       3750 medium       
 2 Adelie       3800 medium       
 3 Adelie       3250 small        
 4 Adelie       3450 small        
 5 Adelie       3650 medium       
 6 Adelie       3625 medium       
 7 Adelie       4675 medium       
 8 Adelie       3475 small        
 9 Adelie       4250 medium       
10 Adelie       3300 small        
# ℹ 332 more rows