Data

Regression von AFD Wahlergebnissen und Mindestlohnbeziehern in einem Kreis

Import der Daten

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.5     ✔ purrr   0.3.4
✔ tibble  3.1.5     ✔ dplyr   1.0.7
✔ tidyr   1.1.4     ✔ stringr 1.4.0
✔ readr   2.0.2     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
mindestlohn <- read_csv2("data/WSI_mindestlohn.csv")
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
New names:
* `` -> ...1
* `` -> ...6
* `` -> ...7
Rows: 417 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (1): Region
dbl (2): ...1, Anteil in Prozent der Beschäftigten mit Mindestlohn-anspruch
lgl (2): ...6, ...7

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wahlergebnisse <- read_csv2("data/Wahlergebnisse.csv")
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
New names:
* `` -> ...2
* `` -> ...3
* `` -> ...4
* `` -> ...5
* `` -> ...6
* ...
Rows: 407 Columns: 94
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (94): © Der Bundeswahlleiter, Wiesbaden 2022, ...2, ...3, ...4, ...5, .....

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clean up the data

wahl <- wahlergebnisse %>%
  rename(
     "kreis" = "...2", 
     "waehler" = "...5",
     "afd_stimmen" = "...57",
     "linke_stimmen" = "...59",
     "name" = "...3",
     "land" = "© Der Bundeswahlleiter, Wiesbaden 2022"
  ) %>%
  mutate(
    afd_prozent = as.numeric(afd_stimmen) / as.numeric(waehler),
    linke_prozent = as.numeric(linke_stimmen) / as.numeric(waehler),
    waehler = as.numeric(waehler),
    afd_stimmen = as.numeric(afd_stimmen),
    linke_stimmen = as.numeric(linke_stimmen)
    ) %>% # calculate afd percentage
  select(kreis, name, land, waehler, afd_stimmen, afd_prozent, linke_prozent) %>% 
  slice(6:nrow(.)) %>%#remove unused rows with metadata and total germany
  filter(!is.na(kreis))
Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#calculater for berlin as whole from east (11100) and west (11200) to 11000
wahl <- wahl %>%
  add_row(
    kreis = "11000",
    name = "Berlin",
    land = "BE",
    waehler = wahl$waehler[wahl$kreis == "11100"] + wahl$waehler[wahl$kreis == "11200"],
    afd_stimmen = wahl$afd_stimmen[wahl$kreis == "11100"] + wahl$afd_stimmen[wahl$kreis == "11200"],
    afd_prozent = (afd_stimmen / waehler)
  ) %>%
  filter(!kreis %in% c("11100", "11200")) #remove east and west berlin
lohn <- mindestlohn %>%
  rename(
    "kreis" = "...1",
    "name" = "Region",
    "lohn_prozent" = "Anteil in Prozent der Beschäftigten mit Mindestlohn-anspruch"
  ) %>%
  select(kreis, name, lohn_prozent) %>%
  # select kreis with 4 or 5 digits
  filter(
    nchar(kreis) == 4 | nchar(kreis) == 5
  ) %>%
  # add leading zero to kreis with 4 digits
  mutate(
    kreis = ifelse(
      nchar(kreis) == 4, 
      paste0("0", kreis), 
      kreis)
  )

Show difference between the two datasets

wahl %>%
  filter(!kreis %in% lohn$kreis) %>%
  select(kreis, name) %>%
  arrange(kreis)

Hamburg is missing, get data from WSI website

lohn <- lohn %>%
  add_row(kreis = "02000", name = "Hamburg", lohn_prozent = 14.7) %>%
  add_row(kreis = "11000", name = "Berlin", lohn_prozent = 17.8)

Merge the two datasets

wahl_lohn <- wahl %>%
  left_join(lohn, by = "kreis") %>%
  mutate(
    lohn_prozent = lohn_prozent/100,
    name = name.x
    ) %>%
  select(kreis, name, land, afd_prozent, lohn_prozent, linke_prozent)

Regression in next file, save the data frame

saveRDS(wahl_lohn, "data/wahl_lohn.rds") #for leading zeros and reimporting