2 Der Tidyverse Approach

Import -> Tidy -> Transform / Visualise / Model -> Communicate

2.1 readr for import

library("readr")

Paket zum importieren verschiedener Formate Import der Dateien (nur mit working directory in “Einführungs” Folder)

#normal CSV
flights_csv <- read_csv("data/flights.csv")
#STATA
flights_dta <- read_dta("data/flights.dta")
#SPSS
flights_sav <- read_sav("data/flights.sav")

load("data/flights.RData")

=> verschiedene praktische Sachen, alles auch im cheatsheet

2.2 working with tidyr

library("tidyr")

different functions to wrangle datasets and clean them

gather: gather columns into key value pairs

#pipe the table into a function
table4a %>% 
  gather("1999","2000", key="year", value="cases")

## # A tibble: 6 × 3
##   country     year   cases
##   <chr>       <chr>  <int>
## 1 Afghanistan 1999     745
## 2 Brazil      1999   37737
## 3 China       1999  212258
## 4 Afghanistan 2000    2666
## 5 Brazil      2000   80488
## 6 China       2000  213766

spread: spread key value pair across columns

#pipe the table into a function
table2 %>% 
  spread(key="type", value="count", convert = T)

## # A tibble: 6 × 4
##   country      year  cases population
##   <chr>       <int>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3 Brazil       1999  37737  172006362
## 4 Brazil       2000  80488  174504898
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

separate: turn single character column into multiple columns

table3 %>% 
  separate(rate, into = c("cases", "population"), convert = T)

## # A tibble: 6 × 4
##   country      year  cases population
##   <chr>       <int>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3 Brazil       1999  37737  172006362
## 4 Brazil       2000  80488  174504898
## 5 China        1999 212258 1272915272
## 6 China        2000 213766 1280428583

unite: unit columns into one

table5 %>% 
  unite("century", "year", col = "year", sep="")

## # A tibble: 6 × 3
##   country     year  rate             
##   <chr>       <chr> <chr>            
## 1 Afghanistan 1999  745/19987071     
## 2 Afghanistan 2000  2666/20595360    
## 3 Brazil      1999  37737/172006362  
## 4 Brazil      2000  80488/174504898  
## 5 China       1999  212258/1272915272
## 6 China       2000  213766/1280428583

2.3 dplyr

Datentransformation mit dplyr

library("dplyr")
library("nycflights13")

Einladen des Datensatzes filter: einzelne Zeilen aus dem Datensatz raúsziehen

flights %>%
  filter(month == 1, day == 1)

## # A tibble: 842 × 19
##     year month   day dep_time sched_dep_time dep_delay
##    <int> <int> <int>    <int>          <int>     <dbl>
##  1  2013     1     1      517            515         2
##  2  2013     1     1      533            529         4
##  3  2013     1     1      542            540         2
##  4  2013     1     1      544            545        -1
##  5  2013     1     1      554            600        -6
##  6  2013     1     1      554            558        -4
##  7  2013     1     1      555            600        -5
##  8  2013     1     1      557            600        -3
##  9  2013     1     1      557            600        -3
## 10  2013     1     1      558            600        -2
## # … with 832 more rows, and 13 more variables:
## #   arr_time <int>, sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

flights %>%
  filter(arr_delay >= 60)

## # A tibble: 28,317 × 19
##     year month   day dep_time sched_dep_time dep_delay
##    <int> <int> <int>    <int>          <int>     <dbl>
##  1  2013     1     1      811            630       101
##  2  2013     1     1      848           1835       853
##  3  2013     1     1      957            733       144
##  4  2013     1     1     1114            900       134
##  5  2013     1     1     1120            944        96
##  6  2013     1     1     1255           1200        55
##  7  2013     1     1     1301           1150        71
##  8  2013     1     1     1337           1220        77
##  9  2013     1     1     1342           1320        22
## 10  2013     1     1     1356           1259        57
## # … with 28,307 more rows, and 13 more variables:
## #   arr_time <int>, sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

arrange: ordnen von Daten

flights %>%
  arrange(dep_time)

## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay
##    <int> <int> <int>    <int>          <int>     <dbl>
##  1  2013     1    13        1           2249        72
##  2  2013     1    31        1           2100       181
##  3  2013    11    13        1           2359         2
##  4  2013    12    16        1           2359         2
##  5  2013    12    20        1           2359         2
##  6  2013    12    26        1           2359         2
##  7  2013    12    30        1           2359         2
##  8  2013     2    11        1           2100       181
##  9  2013     2    24        1           2245        76
## 10  2013     3     8        1           2355         6
## # … with 336,766 more rows, and 13 more variables:
## #   arr_time <int>, sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

select: einzelne spalten

flights %>%
  dplyr::select(year : day)

## # A tibble: 336,776 × 3
##     year month   day
##    <int> <int> <int>
##  1  2013     1     1
##  2  2013     1     1
##  3  2013     1     1
##  4  2013     1     1
##  5  2013     1     1
##  6  2013     1     1
##  7  2013     1     1
##  8  2013     1     1
##  9  2013     1     1
## 10  2013     1     1
## # … with 336,766 more rows

rename: Umbenennung von Spalten namen

dplyr::rename(flights, arriving = arr_time)

## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay
##    <int> <int> <int>    <int>          <int>     <dbl>
##  1  2013     1     1      517            515         2
##  2  2013     1     1      533            529         4
##  3  2013     1     1      542            540         2
##  4  2013     1     1      544            545        -1
##  5  2013     1     1      554            600        -6
##  6  2013     1     1      554            558        -4
##  7  2013     1     1      555            600        -5
##  8  2013     1     1      557            600        -3
##  9  2013     1     1      557            600        -3
## 10  2013     1     1      558            600        -2
## # … with 336,766 more rows, and 13 more variables:
## #   arriving <int>, sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

mutate: eurezugt neue Variable aus alten

flights %>%
  dplyr::select(distance,air_time) %>%
  dplyr::mutate(speed = distance/air_time * 60)

## # A tibble: 336,776 × 3
##    distance air_time speed
##       <dbl>    <dbl> <dbl>
##  1     1400      227  370.
##  2     1416      227  374.
##  3     1089      160  408.
##  4     1576      183  517.
##  5      762      116  394.
##  6      719      150  288.
##  7     1065      158  404.
##  8      229       53  259.
##  9      944      140  405.
## 10      733      138  319.
## # … with 336,766 more rows

summarize und count

flights %>%
  dplyr::summarize(delay_average = mean(dep_delay, na.rm = T) )

## # A tibble: 1 × 1
##   delay_average
##           <dbl>
## 1          12.6

groupby : Gruppieren von Daten

flights %>% 
  group_by(carrier) %>%
  summarize(del_avg = mean(dep_delay, na.rm = T), 
            del_sd= sd(dep_delay, na.rm = T),
            del_median = median(dep_delay, na.rm = T)
            )

## # A tibble: 16 × 4
##    carrier del_avg del_sd del_median
##    <chr>     <dbl>  <dbl>      <dbl>
##  1 9E        16.7    45.9       -2  
##  2 AA         8.59   37.4       -3  
##  3 AS         5.80   31.4       -3  
##  4 B6        13.0    38.5       -1  
##  5 DL         9.26   39.7       -2  
##  6 EV        20.0    46.6       -1  
##  7 F9        20.2    58.4        0.5
##  8 FL        18.7    52.7        1  
##  9 HA         4.90   74.1       -4  
## 10 MQ        10.6    39.2       -3  
## 11 OO        12.6    43.1       -6  
## 12 UA        12.1    35.7        0  
## 13 US         3.78   28.1       -4  
## 14 VX        12.9    44.8        0  
## 15 WN        17.7    43.3        1  
## 16 YV        19.0    49.2       -2

join: zusammenfügen verschiedener Datensätze

flights %>%
  dplyr::select(carrier, dep_time) %>%
  left_join(airlines, by = "carrier")

## # A tibble: 336,776 × 3
##    carrier dep_time name                    
##    <chr>      <int> <chr>                   
##  1 UA           517 United Air Lines Inc.   
##  2 UA           533 United Air Lines Inc.   
##  3 AA           542 American Airlines Inc.  
##  4 B6           544 JetBlue Airways         
##  5 DL           554 Delta Air Lines Inc.    
##  6 UA           554 United Air Lines Inc.   
##  7 B6           555 JetBlue Airways         
##  8 EV           557 ExpressJet Airlines Inc.
##  9 B6           557 JetBlue Airways         
## 10 AA           558 American Airlines Inc.  
## # … with 336,766 more rows

2.4 Aufgabenblatt

Starwars - Datensatz einladen

2.4.1 tidyr-arbeit:

load("data/starwars.RData")

#1 
starwars2 %>% 
  tidyr::gather("feminine", "masculine", , key = "gender", value = "mass", na.rm = T)

## # A tibble: 58 × 6
##    name           height hair_color `<NA>` gender   mass
##    <chr>           <int> <chr>       <dbl> <chr>   <dbl>
##  1 Adi Gallia        184 none           NA femini…  50  
##  2 Ayla Secura       178 none           NA femini…  55  
##  3 Barriss Offee     166 black          NA femini…  50  
##  4 Beru Whitesun…    165 brown          NA femini…  75  
##  5 Leia Organa       150 brown          NA femini…  49  
##  6 Luminara Undu…    170 black          NA femini…  56.2
##  7 Padmé Amidala     165 brown          NA femini…  45  
##  8 Shaak Ti          178 none           NA femini…  57  
##  9 Zam Wesell        168 blonde         NA femini…  55  
## 10 Ackbar            180 none           NA mascul…  83  
## # … with 48 more rows

#2 !Not finished
tidyr::unite(starwars2, "feminine", "masculine", col = "gender" , na.rm = T)

## # A tibble: 87 × 5
##    name                height hair_color gender `<NA>`
##    <chr>                <int> <chr>      <chr>   <dbl>
##  1 Ackbar                 180 none       "83"       NA
##  2 Adi Gallia             184 none       "50"       NA
##  3 Anakin Skywalker       188 blond      "84"       NA
##  4 Arvel Crynyd            NA brown      ""         NA
##  5 Ayla Secura            178 none       "55"       NA
##  6 Bail Prestor Organa    191 black      ""         NA
##  7 Barriss Offee          166 black      "50"       NA
##  8 BB8                     NA none       ""         NA
##  9 Ben Quadinaros         163 none       "65"       NA
## 10 Beru Whitesun lars     165 brown      "75"       NA
## # … with 77 more rows

#3
tidyr::spread(starwars3, key=body_measure, value = body_values, convert = T)

## # A tibble: 87 × 5
##    name                hair_color gender    height  mass
##    <chr>               <chr>      <chr>      <int> <dbl>
##  1 Ackbar              none       masculine    180    83
##  2 Adi Gallia          none       feminine     184    50
##  3 Anakin Skywalker    blond      masculine    188    84
##  4 Arvel Crynyd        brown      masculine     NA    NA
##  5 Ayla Secura         none       feminine     178    55
##  6 Bail Prestor Organa black      masculine    191    NA
##  7 Barriss Offee       black      feminine     166    50
##  8 BB8                 none       masculine     NA    NA
##  9 Ben Quadinaros      none       masculine    163    65
## 10 Beru Whitesun lars  brown      feminine     165    75
## # … with 77 more rows

#4 problems with doulbes and integers
tidyr::separate(starwars4, mass_height, into=c("mass", "height"),convert = T)

## Warning: Expected 2 pieces. Additional pieces discarded
## in 2 rows [21, 61].

## # A tibble: 87 × 5
##    name               hair_color    gender   mass height
##    <chr>              <chr>         <chr>   <int>  <int>
##  1 Luke Skywalker     blond         mascul…    77    172
##  2 C-3PO              <NA>          mascul…    75    167
##  3 R2-D2              <NA>          mascul…    32     96
##  4 Darth Vader        none          mascul…   136    202
##  5 Leia Organa        brown         femini…    49    150
##  6 Owen Lars          brown, grey   mascul…   120    178
##  7 Beru Whitesun lars brown         femini…    75    165
##  8 R5-D4              <NA>          mascul…    32     97
##  9 Biggs Darklighter  black         mascul…    84    183
## 10 Obi-Wan Kenobi     auburn, white mascul…    77    182
## # … with 77 more rows

#5 
tidyr::unite(starwars5, hair_color1, hair_color2, col = "haircolor", na.rm = T, sep="-")

## # A tibble: 87 × 5
##    name               height  mass haircolor      gender
##    <chr>               <int> <dbl> <chr>          <chr> 
##  1 Luke Skywalker        172    77 "blond"        mascu…
##  2 C-3PO                 167    75 ""             mascu…
##  3 R2-D2                  96    32 ""             mascu…
##  4 Darth Vader           202   136 "none"         mascu…
##  5 Leia Organa           150    49 "brown"        femin…
##  6 Owen Lars             178   120 "brown-grey"   mascu…
##  7 Beru Whitesun lars    165    75 "brown"        femin…
##  8 R5-D4                  97    32 ""             mascu…
##  9 Biggs Darklighter     183    84 "black"        mascu…
## 10 Obi-Wan Kenobi        182    77 "auburn-white" mascu…
## # … with 77 more rows

2.4.2 dplyr-arbeit:

#1 
starwars %>%
  filter(mass > 75, mass < 100, height > 180, height < 190) %>%
  count(eye_color)

## # A tibble: 5 × 2
##   eye_color     n
##   <chr>     <int>
## 1 black         1
## 2 blue          1
## 3 blue-gray     1
## 4 brown         6
## 5 yellow        1

#2 
starwars %>%
  filter(is.na(height) | is.na(height))

## # A tibble: 6 × 16
##   name      height  mass hair_color skin_color eye_color
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>    
## 1 Arvel Cr…     NA    NA brown      fair       brown    
## 2 Finn          NA    NA black      dark       dark     
## 3 Rey           NA    NA brown      light      hazel    
## 4 Poe Dame…     NA    NA brown      light      brown    
## 5 BB8           NA    NA none       none       black    
## 6 Captain …     NA    NA unknown    unknown    unknown  
## # … with 10 more variables: birth_year <dbl>,
## #   sex <chr>, gender <chr>, homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>,
## #   starships <list>, star_string <chr>,
## #   films_low <chr>

#3
dplyr::arrange(starwars, -desc(mass))

## # A tibble: 87 × 16
##    name     height  mass hair_color skin_color eye_color
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>    
##  1 Ratts T…     79    15 none       grey, blue unknown  
##  2 Yoda         66    17 white      green      brown    
##  3 Wicket …     88    20 brown      brown      brown    
##  4 R2-D2        96    32 <NA>       white, bl… red      
##  5 R5-D4        97    32 <NA>       white, red red      
##  6 Sebulba     112    40 none       grey, red  orange   
##  7 Dud Bolt     94    45 none       blue, grey yellow   
##  8 Padmé A…    165    45 brown      light      brown    
##  9 Wat Tam…    193    48 none       green, gr… unknown  
## 10 Sly Moo…    178    48 none       pale       white    
## # … with 77 more rows, and 10 more variables:
## #   birth_year <dbl>, sex <chr>, gender <chr>,
## #   homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>,
## #   star_string <chr>, films_low <chr>

dplyr::arrange(starwars, desc(height))

## # A tibble: 87 × 16
##    name    height  mass hair_color skin_color  eye_color
##    <chr>    <int> <dbl> <chr>      <chr>       <chr>    
##  1 Yarael…    264    NA none       white       yellow   
##  2 Tarfful    234   136 brown      brown       blue     
##  3 Lama Su    229    88 none       grey        black    
##  4 Chewba…    228   112 brown      unknown     blue     
##  5 Roos T…    224    82 none       grey        orange   
##  6 Grievo…    216   159 none       brown, whi… green, y…
##  7 Taun We    213    NA none       grey        black    
##  8 Rugor …    206    NA none       green       orange   
##  9 Tion M…    206    80 none       grey        black    
## 10 Darth …    202   136 none       white       yellow   
## # … with 77 more rows, and 10 more variables:
## #   birth_year <dbl>, sex <chr>, gender <chr>,
## #   homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>,
## #   star_string <chr>, films_low <chr>

#Darth Vader height = 202

#4
starwars %>%
  dplyr::filter(species != "Droid") %>%
  dplyr::select(name:mass) %>%
  dplyr::mutate(bmi = mass/((height/100)**2))

## # A tibble: 77 × 4
##    name               height  mass   bmi
##    <chr>               <int> <dbl> <dbl>
##  1 Luke Skywalker        172    77  26.0
##  2 Darth Vader           202   136  33.3
##  3 Leia Organa           150    49  21.8
##  4 Owen Lars             178   120  37.9
##  5 Beru Whitesun lars    165    75  27.5
##  6 Biggs Darklighter     183    84  25.1
##  7 Obi-Wan Kenobi        182    77  23.2
##  8 Anakin Skywalker      188    84  23.8
##  9 Wilhuff Tarkin        180    NA  NA  
## 10 Chewbacca             228   112  21.5
## # … with 67 more rows

#5 
starwars %>% 
  dplyr::rename("größe" = "height" ) %>%
  rename("gewicht"= "mass") %>%
  rename("haarfarbe"= "hair_color" ) %>%
  rename("hautfarbe"= "skin_color")

## # A tibble: 87 × 16
##    name     größe gewicht haarfarbe  hautfarbe eye_color
##    <chr>    <int>   <dbl> <chr>      <chr>     <chr>    
##  1 Luke Sk…   172      77 blond      fair      blue     
##  2 C-3PO      167      75 <NA>       gold      yellow   
##  3 R2-D2       96      32 <NA>       white, b… red      
##  4 Darth V…   202     136 none       white     yellow   
##  5 Leia Or…   150      49 brown      light     brown    
##  6 Owen La…   178     120 brown, gr… light     blue     
##  7 Beru Wh…   165      75 brown      light     blue     
##  8 R5-D4       97      32 <NA>       white, r… red      
##  9 Biggs D…   183      84 black      light     brown    
## 10 Obi-Wan…   182      77 auburn, w… fair      blue-gray
## # … with 77 more rows, and 10 more variables:
## #   birth_year <dbl>, sex <chr>, gender <chr>,
## #   homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>,
## #   star_string <chr>, films_low <chr>

#6
starwars %>%
  dplyr::filter(species != "Droid") %>%
  dplyr::filter(!is.na(height) , !is.na(mass)) %>%
  dplyr::select(name:mass) %>%
  dplyr::mutate(bmi = mass/((height/100)**2)) %>%
  dplyr::summarize(
    mass_mean = mean(mass),
    mass_median = median(mass),
    mass_sd = sd(mass),
    height_mean = mean(height),
    height_median = median(height),
    height_sd = sd(height),
    bmi_mean = mean(bmi),
    bmi_median = median(bmi),
    bmi_sd = sd(bmi),
  )

## # A tibble: 1 × 9
##   mass_mean mass_median mass_sd height_mean
##       <dbl>       <dbl>   <dbl>       <dbl>
## 1      100.          79    177.        176.
## # … with 5 more variables: height_median <dbl>,
## #   height_sd <dbl>, bmi_mean <dbl>, bmi_median <dbl>,
## #   bmi_sd <dbl>

#7 
starwars %>%
  dplyr::filter(!is.na(height) , !is.na(mass)) %>%
  dplyr::select(name:mass, species) %>%
  dplyr::mutate(bmi = mass/((height/100)**2)) %>%
  dplyr::group_by(species) %>%
  dplyr::summarize(
    mass_mean = mean(mass),
    mass_median = median(mass),
    mass_sd = sd(mass),
    height_mean = mean(height),
    height_median = median(height),
    height_sd = sd(height),
    bmi_mean = mean(bmi),
    bmi_median = median(bmi),
    bmi_sd = sd(bmi),
  )

## # A tibble: 32 × 10
##    species   mass_mean mass_median mass_sd height_mean
##    <chr>         <dbl>       <dbl>   <dbl>       <dbl>
##  1 Aleena         15          15      NA           79 
##  2 Besalisk      102         102      NA          198 
##  3 Cerean         82          82      NA          198 
##  4 Clawdite       55          55      NA          168 
##  5 Droid          69.8        53.5    51.0        140 
##  6 Dug            40          40      NA          112 
##  7 Ewok           20          20      NA           88 
##  8 Geonosian      80          80      NA          183 
##  9 Gungan         74          74      11.3        210 
## 10 Human          82.8        79      19.4        180.
## # … with 22 more rows, and 5 more variables:
## #   height_median <dbl>, height_sd <dbl>,
## #   bmi_mean <dbl>, bmi_median <dbl>, bmi_sd <dbl>

#8 
starwars %>%
  dplyr::filter(!is.na(height) , !is.na(mass)) %>%
  dplyr::select(name:mass, gender, species) %>%
  dplyr::mutate(bmi = mass/((height/100)**2)) %>%
  dplyr::group_by(gender, species) %>%
  dplyr::summarize(
    mass_mean = mean(mass),
    mass_median = median(mass),
    mass_sd = sd(mass),
    height_mean = mean(height),
    height_median = median(height),
    height_sd = sd(height),
    bmi_mean = mean(bmi),
    bmi_median = median(bmi),
    bmi_sd = sd(bmi),
  )

## `summarise()` has grouped output by 'gender'. You can override using the `.groups` argument.

## # A tibble: 33 × 11
## # Groups:   gender [3]
##    gender    species    mass_mean mass_median mass_sd
##    <chr>     <chr>          <dbl>       <dbl>   <dbl>
##  1 feminine  Clawdite        55          55     NA   
##  2 feminine  Human           56.3        49     16.3 
##  3 feminine  Mirialan        53.1        53.1    4.38
##  4 feminine  Tholothian      50          50     NA   
##  5 feminine  Togruta         57          57     NA   
##  6 feminine  Twi'lek         55          55     NA   
##  7 masculine Aleena          15          15     NA   
##  8 masculine Besalisk       102         102     NA   
##  9 masculine Cerean          82          82     NA   
## 10 masculine Droid           69.8        53.5   51.0 
## # … with 23 more rows, and 6 more variables:
## #   height_mean <dbl>, height_median <dbl>,
## #   height_sd <dbl>, bmi_mean <dbl>, bmi_median <dbl>,
## #   bmi_sd <dbl>

#9 
count(starwars, gender)

## # A tibble: 3 × 2
##   gender        n
##   <chr>     <int>
## 1 feminine     17
## 2 masculine    66
## 3 <NA>          4

#10
dplyr::left_join(starwars6,starwars7, on = "name")

## Joining, by = "name"

## # A tibble: 87 × 14
##    name    height  mass birth_year hair_color skin_color
##    <chr>    <int> <dbl>      <dbl> <chr>      <chr>     
##  1 Luke S…    172    77       19   blond      fair      
##  2 C-3PO      167    75      112   <NA>       gold      
##  3 R2-D2       96    32       33   <NA>       white, bl…
##  4 Darth …    202   136       41.9 none       white     
##  5 Leia O…    150    49       19   brown      light     
##  6 Owen L…    178   120       52   brown, gr… light     
##  7 Beru W…    165    75       47   brown      light     
##  8 R5-D4       97    32       NA   <NA>       white, red
##  9 Biggs …    183    84       24   black      light     
## 10 Obi-Wa…    182    77       57   auburn, w… fair      
## # … with 77 more rows, and 8 more variables:
## #   eye_color <chr>, sex <chr>, gender <chr>,
## #   homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

yeehaw, fertig