In this YouTube video I explain how to use the code below to get the IMDB ratings for each episode of Stranger Things and produce this awesome looking graph.
Big thanks to Ansgar Wolsing (bydata) who did the heavy lifting, producing the original code which I have adapted. The IMDB non-commercial datasets page detail what is in each dataset and the datasets themselves can be downloaded from https://datasets.imdbws.com/
Subscribe for updates on my latest videos, courses, and more.
# load packages
library(tidyverse)
library(shadowtext)
library(ggfx)
library(ggtext)
# download title.basics title.episode and title.ratings
# from https://datasets.imdbws.com/ and unzip
title_basics <- read_tsv("title.basics.tsv", na = "\\N", quote = '')
title_episode <- read_tsv("title.episode.tsv", na = "\\N", quote = '')
title_ratings <- read_tsv("title.ratings.tsv", na = "\\N", quote = '')
# filter Stranger Things
basics_st <- title_basics %>%
filter(primaryTitle == "Stranger Things", titleType == "tvSeries", startYear == "2016")
# Get find episodes and merge ratings
parent_title_id <- basics_st$tconst
episodes_st <- title_episode %>%
filter(parentTconst == parent_title_id) %>%
inner_join(title_ratings, by = "tconst") %>%
arrange(seasonNumber, episodeNumber) %>%
collect() %>% mutate(across(c(seasonNumber, episodeNumber), as.numeric))
# average season ratings
average_rating_season <- episodes_st %>%
group_by(seasonNumber) %>%
mutate(rating_votes = averageRating * numVotes) %>%
summarize(wgt_avg_season_rating = sum(rating_votes) / sum(numVotes),
avg_season_rating = mean(averageRating))
# episode ratings
episodes_st_cont <- episodes_st %>%
arrange(seasonNumber, episodeNumber) %>%
mutate(ep_cont = row_number()) %>%
inner_join(average_rating_season, by = "seasonNumber")
# min and median episode counts for positioning labels
episodes_st_cont_summary <- episodes_st_cont %>%
group_by(seasonNumber) %>%
summarize(ep_cont_min = min(ep_cont),
ep_cont_median = median(ep_cont))
## -------
## Credit to Ansgar Wolsing (https://github.com/bydata)
## for the original plot I have adapted from
## https://github.com/bydata/tidytuesday/tree/main/2022/30/R
## -------
# function for consistent format text annotation
annotate_richtext <- function(label, ...) {
annotate("richtext", label = label,
family = "Montserrat", size = 2.75,
fill = NA, label.color = NA, color = "grey94", label.padding = unit(0.05, "mm"),
hjust = 0, ...)}
# default colours
main_color <- "#B1281E"
bg_color <- "grey9"
title_pos <- 12.5
# Title Text
titles <- list(
"title" = "___________\nSTRANGER\n‾‾ THINGS ‾‾\n\n",
"subtitle" = "Each dot represents the average IMDB rating of an episode. The horizontal bars indicate
average season ratings (weighted by the number of votes).", "caption"="")
# jitters first and last episode coordinate
episodes_st_cont %>%
group_by(seasonNumber) %>%
mutate(ep_cont_extended = case_when(
ep_cont == min(ep_cont) ~ as.numeric(ep_cont) - 0.25,
ep_cont == max(ep_cont) ~ as.numeric(ep_cont) + 0.25,
TRUE ~ as.numeric(ep_cont)
)) %>%
ungroup() %>%
# main plot
ggplot(aes(ep_cont, averageRating, group = factor(seasonNumber))) +
geom_curve(
aes(xend = ep_cont, y = wgt_avg_season_rating, yend = averageRating),
col = main_color, lty = "solid", size = 0.5, curvature = 0.2) +
with_shadow(
geom_line(
aes(ep_cont_extended, y = wgt_avg_season_rating),
col = main_color, size = 2, lty = "solid"),
colour = "grey2", expand = 0.75, lineend = "butt",
) +
with_outer_glow(
geom_point(color = "grey80", size = 3),
expand = 15, colour = main_color, sigma = 21
) +
# geom_point(color = "grey80", size = 3) +
geom_richtext(
data = episodes_st_cont_summary,
aes(
x = ep_cont_median, y = 10.25,
label = glue::glue(
"<span style='font-size:9pt; color: grey72'>Season</span>
<span style='font-size:24pt; color: #84251D'>{seasonNumber}</span>"
)
),
stat = "unique", hjust = 0.5, vjust = 0.5,
family = "Benguiat", fill = NA, label.size = 0
) +
# Annotations
annotate_richtext(
label = "S2 E7 (The Lost Sister)",
x = 9.5, y = 6) +
# Custom title with shadowtext
shadowtext::geom_shadowtext(
data = NULL,
aes(x = nrow(episodes_st_cont) / 2, y = title_pos, label = titles$title),
family = "Benguiat", color = bg_color, bg.color = "#B1281E", size = 9,
hjust = 0.5, vjust = 0.7, inherit.aes = FALSE, lineheight = 0.8) +
# Custom subtitle
annotate(GeomTextBox, x = nrow(episodes_st_cont) / 2, y = title_pos - 0.75,
label = titles$subtitle, color = "grey82",
width = 0.8, hjust = 0.5, halign = 0.5, vjust = 1, size = 3.5,
lineheight = 1.25, family = "Montserrat", fill = NA, box.size = 0) +
scale_y_continuous(breaks = seq(6, 10, 1), minor_breaks = seq(6, 10, 0.5)) +
coord_cartesian(ylim = c(6, title_pos), clip = "off") +
guides(color = "none") +
labs(caption = titles["caption"], y = "Average Rating") +
theme_minimal(base_family = "Montserrat") +
theme(
plot.background = element_rect(color = NA, fill = bg_color),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_text(color = "grey62"),
panel.background = element_rect(color = NA, fill = NA),
text = element_text(color = "grey82"),
plot.caption = element_markdown(),
panel.grid = element_blank(),
panel.grid.major.y = element_line(color = "grey20", size = 0.2),
panel.grid.minor.y = element_line(color = "grey20", size = 0.1)
)