if (!require("pacman")) install.packages("pacman")
pacman::p_load(
here,
magrittr, janitor,
ggpubr,
gt, gtExtras,
countdown,
quanteda, # quanteda text processing
quanteda.textplots, quanteda.textstats, quanteda.textmodels,
tidytext,
udpipe, spacyr, # POS tagging
stm, stminsights,
easystats, tidyverse
)đ¨ Topic Modeling in R
Tutorial - Session 09
Background
Preparation
# Import base data
chats <- qs::qread(here("local_data/chat-debates_full.qs"))$correct
# Import corpora
chats_spacyr <- qs::qread(here("local_data/chat-corpus_spacyr.qs"))
stm_search <- qs::qread(here("local_data/stm-majority_report-search.qs"))
stm_results <- qs::qread(here("local_data/stm-majority_report-results.qs"))Codechunks aus der Sitzung
Vorverarbeitung der Daten
chats_valid <- chats %>%
mutate(
across(c(debate, platform), ~as.factor(.x))
) Vorverarbeitung des Korpus
# spacyr-Korpus zu Tokens
chat_spacyr_toks <- chats_spacyr %>%
as.tokens(
use_lemma = TRUE
) %>%
tokens(
remove_punct = TRUE,
remove_symbols = TRUE,
remove_numbers = FALSE,
remove_url = FALSE,
split_hyphens = FALSE,
split_tags = FALSE,
) %>%
tokens_remove(
pattern = stopwords("en")
) %>%
tokens_ngrams(n = 1:3) Add docvars
# Get document names from the original data
original_docnames <- chats$message_id
# Get document names from the tokens object
token_docnames <- docnames(chat_spacyr_toks)
# Identify missing documents
missing_docs <- setdiff(original_docnames, token_docnames)
# Exclude "empty" messages
chats_filtered <- chats_valid %>%
filter(!message_id %in% missing_docs)
# Add docvars
docvars(chat_spacyr_toks) <- chats_filteredFokus on The Majority Report
# Subset tokens based on docvars
majority_report_chat_toks <- tokens_subset(
chat_spacyr_toks, streamer == "the_majority_report")# Convert to DFM
majority_report_chat_dfm <- majority_report_chat_toks %>%
dfm()
# Pruning
majority_report_chat_trim <- majority_report_chat_dfm %>%
dfm_trim(
min_docfreq = 50/nrow(chats),
max_docfreq = 0.99,
docfreq_type = "prop"
)
# Convert for stm topic modeling
majority_report_chat_stm <- majority_report_chat_trim %>%
convert(to = "stm")Estimate models
# Define parameters
future::plan(future::multisession()) # use multiple sessions
topic_range <- seq(from = 4, to = 20, by = 2)
# Initiate notifications & time tracking
tictoc::tic("STM chats - Majority Report")
# Estimate models
stm_search <- tibble(k = topic_range) %>%
mutate(
mdl = furrr::future_map(
k,
~stm::stm(
documents = majority_report_chat_stm$documents,
vocab = majority_report_chat_stm$vocab,
prevalence =~ platform + debate + message_during_debate,
K = .,
seed = 42,
max.em.its = 1000,
data = majority_report_chat_stm$meta,
init.type = "Spectral",
verbose = TRUE),
.options = furrr::furrr_options(seed = 42)
)
)
# Sent status update and finish time tracking
tictoc::toc(log = TRUE)# Create heldout
heldout <- make.heldout(
majority_report_chat_stm$documents,
majority_report_chat_stm$vocab,
seed = 42)
# Create model diagnostics
stm_results <- stm_search %>%
mutate(
exclusivity = map(mdl, exclusivity),
semantic_coherence = map(mdl, semanticCoherence, majority_report_chat_stm$documents),
eval_heldout = map(mdl, eval.heldout, heldout$missing),
residual = map(mdl, checkResiduals, majority_report_chat_stm$documents),
bound = map_dbl(mdl, function(x) max(x$convergence$bound)),
lfact = map_dbl(mdl, function(x) lfactorial(x$settings$dim$K)),
lbound = bound + lfact,
iterations = map_dbl(mdl, function(x) length(x$convergence$bound))
)Vergleich des statistischen Fits
stm_results %>%
transmute(
k,
`Lower bound` = lbound,
Residuals = map_dbl(residual, "dispersion"),
`Semantic coherence` = map_dbl(semantic_coherence, mean),
`Held-out likelihood` = map_dbl(eval_heldout, "expected.heldout")) %>%
gather(Metric, Value, -k) %>%
ggplot(aes(k, Value, color = Metric)) +
geom_line(size = 1.5, alpha = 0.7, show.legend = FALSE) +
geom_point(size = 3) +
scale_x_continuous(breaks = seq(from = 4, to = 20, by = 2)) +
facet_wrap(~Metric, scales = "free_y") +
labs(x = "K (Anzahl der Themen)",
y = NULL,
title = "Statistischer Fit der STM-Modelle",
subtitle = "Kohärenz sollte hoch, Residuen niedrig sein"
) +
theme_pubr()
Hohe Kohärenz bei hoher Exklusivität
# Models for comparison
models_for_comparison = c(12, 14, 18)
# Create figures
fig_excl <- stm_results %>%
# Edit data
select(k, exclusivity, semantic_coherence) %>%
filter(k %in% models_for_comparison) %>%
unnest(cols = c(exclusivity, semantic_coherence)) %>%
mutate(k = as.factor(k)) %>%
# Build graph
ggplot(aes(semantic_coherence, exclusivity, color = k)) +
geom_point(size = 2, alpha = 0.7) +
labs(
x = "Semantic coherence",
y = "Exclusivity"
# title = "Comparing exclusivity and semantic coherence",
# subtitle = "Models with fewer topics have higher semantic coherence for more topics, but lower exclusivity"
) +
theme_pubr()
# Create plotly
fig_excl %>% plotly::ggplotly()Extraktion der Beta- & Gamma-Matrix
# Define model
tpm_k14 <- stm_results %>%
filter(k == 14) |>
pull(mdl) %>% .[[1]]
# Beta matrix
tpm_k14 %>%
tidy(., matrix = "frex") # A tibble: 154,490 Ă 2
topic term
<int> <chr>
1 1 look_like
2 1 look_like_go
3 1 like_go
4 1 hahahahahahaha
5 1 look_like_s
6 1 think_go
7 1 check_moderator
8 1 fact_check_moderator
9 1 moderator_fact
10 1 moderator_fact_check
# âš 154,480 more rows
# Gamma matrix
tpm_k14 %>%
tidy(.,matrix = "gamma",
document_names = names(majority_report_chat_stm$documents)
) # A tibble: 322,840 Ă 3
document topic gamma
<chr> <int> <dbl>
1 ChwKGkNJR2poT3pVdVlnREZha1FyUVlkblNrWS1B 1 0.0261
2 ChwKGkNLbXd3LXpVdVlnREZWd0wxZ0FkYW9FSWdB 1 0.0265
3 ChwKGkNKR1RsdV9VdVlnREZkNFhyUVlkZ2d3Tk5n 1 0.0123
4 ChwKGkNOQ3kxUExVdVlnREZVS1k1UWNkQ0t3Mlhn 1 0.0200
5 ChwKGkNPcW5fZkxVdVlnREZlSFJsQWtkbThZaUtR 1 0.0232
6 ChwKGkNNUHZzdlhVdVlnREZha1FyUVlkblNrWS1B 1 0.0236
7 ChwKGkNLT1JuX2pVdVlnREZZX0FsQWtkcEw4Wmd3 1 0.434
8 ChwKGkNLRElvZmpVdVlnREZaWExGZ2tkTy1ZSXVR 1 0.0118
9 ChwKGkNNblNqZm5VdVlnREZhX0l3Z1FkZUg0bHZn 1 0.0356
10 ChwKGkNMeUkyUHZVdVlnREZXQUhyUVlkTUJvZ193 1 0.00307
# âš 322,830 more rows
Extraktion der Top Features nach Thema
# Create gamma data
top_gamma_k14 <- tpm_k14 %>%
tidy(matrix = "gamma") %>%
dplyr::group_by(topic) %>%
dplyr::summarise(
gamma = mean(gamma),
.groups = "drop") %>%
dplyr::arrange(desc(gamma))
# Create beta data
top_beta_k14 <- tpm_k14 %>%
tidytext::tidy(.) %>%
dplyr::group_by(topic) %>%
dplyr::arrange(-beta) %>%
dplyr::top_n(7, wt = beta) %>%
dplyr::select(topic, term) %>%
dplyr::summarise(
terms_beta = toString(term),
.groups = "drop")
# Merge gamma & beta data
top_topics_terms_k14 <- top_beta_k14 %>%
dplyr::left_join(
top_gamma_k14,
by = "topic") %>%
dplyr::mutate(
topic = paste0("Topic ", topic),
topic = reorder(topic, gamma)
)
# Create output
top_topics_terms_k14 %>%
mutate(across(gamma, ~round(.,3))) %>%
dplyr::arrange(-gamma) %>%
gt() %>%
gtExtras::gt_theme_538() %>%
gt::tab_options(
table.width = gt::pct(100),
table.font.size = "12px"
)| topic | terms_beta | gamma |
|---|---|---|
| Topic 8 | make, 's, lul, emma, fuchsia, liar, kekl | 0.115 |
| Topic 7 | good, right, now, yes, plan, lie, bad | 0.113 |
| Topic 12 | kamala, want, biden, eat, take, vote, god | 0.109 |
| Topic 5 | get, s, wow, mad, omg, thank, nice | 0.101 |
| Topic 4 | lmao, omegalul, red, green, orange, baby, kekw | 0.079 |
| Topic 3 | time, sam, love, man, need, old, big | 0.075 |
| Topic 11 | say, oh, ..., know, look, shit, yeah | 0.075 |
| Topic 1 | go, like, fact, debate, look, check, keep | 0.058 |
| Topic 13 | trump, just, donald, lose, racist, win, can | 0.056 |
| Topic 9 | lol, one, give, ...., wtf, china, okay | 0.051 |
| Topic 6 | people, think, go, back, work, try, change | 0.050 |
| Topic 10 | let, talk, âs, can, like, sound, see | 0.045 |
| Topic 2 | stop, start, please, israel, use, laugh, agree | 0.039 |
| Topic 14 | face, guy, don, bring, real, country, rolling_on_the_floor_laughe | 0.034 |
Extraktion & ZusammenfĂźhrung der Daten
# Prepare for merging
topic_gammas_k14 <- tpm_k14 %>%
tidy(matrix = "gamma") %>%
dplyr::group_by(document) %>%
tidyr::pivot_wider(
id_cols = document,
names_from = "topic",
names_prefix = "gamma_topic_",
values_from = "gamma")
gammas_k14 <- tpm_k14 %>%
tidytext::tidy(matrix = "gamma") %>%
dplyr::group_by(document) %>%
dplyr::slice_max(gamma) %>%
dplyr::mutate(
main_topic = ifelse(
gamma > 0.5, topic, NA)) %>%
rename(
top_topic = topic,
top_gamma = gamma) %>%
ungroup() %>%
left_join(.,
topic_gammas_k14,
by = join_by(document))
# Identify empty documents
empty_docs <- Matrix::rowSums(
as(majority_report_chat_trim, "Matrix")) == 0
empty_docs_ids <- majority_report_chat_trim@docvars$docname[empty_docs]
# Merge with original data
chats_topics <- chats_filtered %>%
filter(!(message_id %in% empty_docs_ids)) %>%
filter(streamer == "the_majority_report") %>%
bind_cols(gammas_k14) %>%
select(-document)
# Preview
chats_topics %>% glimpseRows: 23,060
Columns: 50
$ streamer <chr> "the_majority_report", "the_majority_report", "tâŚ
$ url <chr> "https://www.youtube.com/watch?v=lzobJil9Sgc", "âŚ
$ platform <fct> youtube, youtube, youtube, youtube, youtube, youâŚ
$ debate <fct> presidential, presidential, presidential, presidâŚ
$ user_name <chr> "Scott Plant", "Rebecca W", "Galactic News NetwoâŚ
$ user_id <chr> "UC4mxlnk193JrXVAp6K-vEpQ", "UCeenHJ1v62biyOyKwLâŚ
$ user_display_name <chr> "Scott Plant", "Rebecca W", "Galactic News NetwoâŚ
$ user_badges <list> [], [], [], [], [], [], [], [], [], [], [], [],âŚ
$ message_timestamp <dbl> -152, -151, -145, -138, -137, -132, -126, -126, âŚ
$ message_id <chr> "ChwKGkNJR2poT3pVdVlnREZha1FyUVlkblNrWS1B", "ChwâŚ
$ message_type <chr> "text_message", "text_message", "text_message", âŚ
$ message_content <chr> "Donnie will say, \"That is my own sperm.\"", "wâŚ
$ message_emotes <list> [], [], [["UCkszU2WH9gy1mb0dV-11UJg/ssIfY7OFG5OâŚ
$ message_length <int> 40, 45, 52, 38, 10, 32, 8, 14, 2, 90, 20, 36, 20âŚ
$ message_timecode <Period> -2M -32S, -2M -31S, -2M -25S, -2M -18S, -2M -âŚ
$ message_time <chr> "23:57:28", "23:57:29", "23:57:35", "23:57:42", âŚ
$ message_during_debate <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_has_badge <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_premium <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_subscriber <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_turbo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_moderator <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_partner <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_subgifter <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_broadcaster <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_vip <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_twitchdj <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_founder <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_staff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_game_dev <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_is_ambassador <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_no_audio <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ user_no_video <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, âŚ
$ top_topic <int> 11, 7, 4, 3, 4, 3, 1, 4, 9, 4, 8, 3, 1, 1, 3, 13âŚ
$ top_gamma <dbl> 0.4435422, 0.3412468, 0.7627751, 0.5663056, 0.46âŚ
$ main_topic <int> NA, NA, 4, 3, NA, NA, NA, 4, NA, 4, NA, NA, NA, âŚ
$ gamma_topic_1 <dbl> 0.026098022, 0.026450828, 0.012260893, 0.0200424âŚ
$ gamma_topic_2 <dbl> 0.014058480, 0.016014175, 0.006519458, 0.1322111âŚ
$ gamma_topic_3 <dbl> 0.043655546, 0.066001729, 0.018185091, 0.5663056âŚ
$ gamma_topic_4 <dbl> 0.03876696, 0.14976529, 0.76277514, 0.03011074, âŚ
$ gamma_topic_5 <dbl> 0.186801763, 0.043801244, 0.020095565, 0.0373451âŚ
$ gamma_topic_6 <dbl> 0.021470740, 0.024622665, 0.009041711, 0.0174135âŚ
$ gamma_topic_7 <dbl> 0.036282513, 0.341246826, 0.017882159, 0.0289225âŚ
$ gamma_topic_8 <dbl> 0.04538521, 0.14311198, 0.06168206, 0.03558740, âŚ
$ gamma_topic_9 <dbl> 0.021910232, 0.023552979, 0.012143933, 0.0165138âŚ
$ gamma_topic_10 <dbl> 0.020656194, 0.020843309, 0.013562820, 0.0161670âŚ
$ gamma_topic_11 <dbl> 0.443542243, 0.027314995, 0.019249172, 0.0206369âŚ
$ gamma_topic_12 <dbl> 0.044397591, 0.044680183, 0.019709728, 0.0343713âŚ
$ gamma_topic_13 <dbl> 0.027631227, 0.037326873, 0.011473224, 0.0214522âŚ
$ gamma_topic_14 <dbl> 0.029343282, 0.035266919, 0.015419046, 0.0229199âŚ
Themen im Fokus
Top Topic
chats_topics %>%
filter(top_topic == 8) %>%
arrange(-top_gamma) %>%
slice_head(n = 10) %>%
select(message_id, user_name, message_time, message_content, top_gamma, top_topic) %>%
gt() %>%
gtExtras::gt_theme_538() %>%
gt::tab_options(table.font.size = "10px")| message_id | user_name | message_time | message_content | top_gamma | top_topic |
|---|---|---|---|---|---|
| ChwKGkNKdlRqY1BwdVlnREZRREV3Z1FkV2I4U1hn | David Davis | 01:29:52 | :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: | 0.9628609 | 8 |
| ChwKGkNLMlp3cUxzdVlnREZVN0NsQWtkT0JBRTN3 | Jules Winnfeild đłď¸ââ§ď¸ | 01:42:09 | :face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape: | 0.9583042 | 8 |
| ChwKGkNNclk0dFBZdVlnREZZYWg1UWNkUlhvNVB3 | CanalEduge | 00:14:24 | :face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape: | 0.9437816 | 8 |
| ChwKGkNJaVdpNVBmdVlnREZTV1Q1UWNkUWg0dEJn | Jules Winnfeild đłď¸ââ§ď¸ | 00:43:27 | :face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape::face-fuchsia-poop-shape: | 0.9315330 | 8 |
| ChwKGkNORG1uTWpvdVlnREZkd3VyUVlkSVFrVU5R | #BobbleHead | 01:25:34 | WORLDSTAR own's the Trademark on the Algorithm that identified all the Pedophiles = Blame T.M.Z. #ReleaseTheBlackBaby | 0.9313871 | 8 |
| ChwKGkNPV09fYmJwdVlnREZXc3ByUVlkbk9Vc3d3 | David Davis | 01:29:26 | :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: :watermelon: | 0.9303014 | 8 |
| ChwKGkNMV244NkRvdVlnREZUMFRyUVlkYmZzUmpB | #BobbleHead | 01:24:12 | WORLDSTAR own's the Trademark on the Algorithm that identified all the Pedophiles = Blame T.M.Z. | 0.9287878 | 8 |
| ChwKGkNLN0N4ZTdldVlnREZWNDZyUVlkRmZJRklR | Corporations8MyBaby | 00:42:10 | :face-fuchsia-tongue-out::face-fuchsia-tongue-out::face-fuchsia-tongue-out::face-fuchsia-tongue-out: | 0.9269656 | 8 |
| 9c014ab4-89a7-4f9d-97c8-be3da2868f58 | nightbot | 00:10:47 | Join the official Majority Report discord community! https://discord.gg/majority | 0.9215614 | 8 |
| fcb53a8b-4b75-4557-b3eb-d273b7069d88 | nightbot | 00:26:14 | Join the official Majority Report discord community! https://discord.gg/majority | 0.9215614 | 8 |
Thema 12
chats_topics %>%
filter(top_topic == 12) %>%
arrange(-top_gamma) %>%
slice_head(n = 10) %>%
select(message_id, user_name, message_time, message_content, top_gamma, top_topic) %>%
gt() %>%
gtExtras::gt_theme_538() %>%
gt::tab_options(table.font.size = "10px")| message_id | user_name | message_time | message_content | top_gamma | top_topic |
|---|---|---|---|---|---|
| ChwKGkNNZXg5LUxxdVlnREZkcVc1UWNkeGpNTDJB | SamSedersLeftTeste | 01:35:27 | The vice president is BLACK BLACK BLACK BLACK BLACK BLACK | 0.9227423 | 12 |
| ChwKGkNNdVU0NTNndVlnREZkNEwxZ0FkbWxFSFN3 | Rilly Kewl | 00:48:18 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNJU1BtS0RndVlnREZRREV3Z1FkV2I4U1hn | Rilly Kewl | 00:48:23 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNJS1NvcVRndVlnREZRMHUxZ0FkU1FFSzZB | Rilly Kewl | 00:48:31 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNLSG9uYUxodVlnREZhY0cxZ0FkSVJjSGdB | Rilly Kewl | 00:52:56 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNPbU90YV9odVlnREZWZ3FyUVlkaUpnNUpn | Rilly Kewl | 00:53:23 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNLYmxuZjdzdVlnREZiMHUxZ0FkT0owN0h3 | Rilly Kewl | 01:45:21 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNLX3F4cl90dVlnREZWbzAxZ0FkdzVFTTR3 | Rilly Kewl | 01:47:38 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| ChwKGkNOYm1yTVB0dVlnREZWb0gxZ0FkQnF3QWRR | Rilly Kewl | 01:47:46 | Hey Hey Hey Hey SHUT UP:red_exclamation_mark: | 0.9121621 | 12 |
| bad4de96-6c3f-4495-9bd5-da395d9af90b | grandshadowfox | 01:07:37 | Grandshadowfox subscribed with Prime. They've subscribed for 16 months! 15 months | 0.9064436 | 12 |
Thema 4
chats_topics %>%
filter(top_topic == 4) %>%
arrange(-top_gamma) %>%
slice_head(n = 10) %>%
select(message_id, user_name, message_time, message_content, top_gamma, top_topic) %>%
gt() %>%
gtExtras::gt_theme_538() %>%
gt::tab_options(table.font.size = "10px")| message_id | user_name | message_time | message_content | top_gamma | top_topic |
|---|---|---|---|---|---|
| ChwKGkNLX3Z3SzdkdVlnREZhb0NyUVlkVER3aVRn | rhys | 00:35:28 | :text-green-game-over::text-green-game-over::text-green-game-over::text-green-game-over::text-green-game-over::text-green-game-over: | 0.9687024 | 4 |
| ChwKGkNJN1hsSXJudVlnREZWTGNGZ2tkYnFnYmJB | Jules Winnfeild đłď¸ââ§ď¸ | 01:18:56 | :fish-orange-wide-eyes::fish-orange-wide-eyes::fish-orange-wide-eyes::fish-orange-wide-eyes::fish-orange-wide-eyes: | 0.9649579 | 4 |
| ChwKGkNJQ3ZzYTdXdVlnREZSek1GZ2tkMndnZ1Bn | fish Monger | 00:04:09 | ideas:finger-red-number-one::finger-red-number-one::finger-red-number-one::finger-red-number-one::finger-red-number-one::finger-red-number-one::finger-red-number-one::finger-red-number-one: | 0.9647715 | 4 |
| ChwKGkNNcmVxb0RmdVlnREZhb0NyUVlkVER3aVRn | rhys | 00:42:48 | :text-green-game-over::text-green-game-over::text-green-game-over::text-green-game-over::text-green-game-over: | 0.9630820 | 4 |
| 32d36382-5eaf-4da6-a2dc-c9683b98162b | nightbot | 00:01:27 | Libertarians, call into the show! 646 257-3920. Phones open after 1pm EST. Download the Majority Report app to IM into the show. Go to JoinTheMajorityReport.com to become a member and help support the show. | 0.9629138 | 4 |
| 4ffbae78-db39-40e9-bcf8-b5c0965fe2a4 | nightbot | 00:09:42 | Libertarians, call into the show! 646 257-3920. Phones open after 1pm EST. Download the Majority Report app to IM into the show. Go to JoinTheMajorityReport.com to become a member and help support the show. | 0.9629138 | 4 |
| a08570c3-f835-4568-9332-b97bf22ee61b | nightbot | 02:01:22 | Libertarians, call into the show! 646 257-3920. Phones open after 1pm EST. Download the Majority Report app to IM into the show. Go to JoinTheMajorityReport.com to become a member and help support the show. | 0.9629138 | 4 |
| 46b82320-e59d-486e-a58f-acf35b03fe4a | nightbot | 02:09:43 | Libertarians, call into the show! 646 257-3920. Phones open after 1pm EST. Download the Majority Report app to IM into the show. Go to JoinTheMajorityReport.com to become a member and help support the show. | 0.9629138 | 4 |
| 191d1514-cc7e-4a65-8c9e-0ce5d28f1a5d | nightbot | 02:22:30 | Libertarians, call into the show! 646 257-3920. Phones open after 1pm EST. Download the Majority Report app to IM into the show. Go to JoinTheMajorityReport.com to become a member and help support the show. | 0.9629138 | 4 |
| ed759097-6071-4394-b810-5adafd52f652 | nightbot | 02:35:23 | Libertarians, call into the show! 646 257-3920. Phones open after 1pm EST. Download the Majority Report app to IM into the show. Go to JoinTheMajorityReport.com to become a member and help support the show. | 0.9629138 | 4 |
User mit den meisten Beiträgen zu Thema 4
chats_topics %>%
filter(top_topic == 8) %>%
count(user_name, sort = TRUE) %>%
mutate(
prop = round(n/sum(n)*100, 2)) %>%
slice_head(n = 10) %>%
gt() %>%
gtExtras::gt_theme_538() | user_name | n | prop |
|---|---|---|
| buuuuuuuuuuuuuuuuuuuuuut | 59 | 1.83 |
| sauvignoncitizen | 50 | 1.55 |
| Say What | 49 | 1.52 |
| Jules Winnfeild đłď¸ââ§ď¸ | 47 | 1.45 |
| asiak | 46 | 1.42 |
| hardradajm | 40 | 1.24 |
| Bob Carmody | 34 | 1.05 |
| T.R. | 33 | 1.02 |
| maj_k1bbles | 31 | 0.96 |
| ogdimwit | 31 | 0.96 |
Prävalenz vs. Häufigkeit
top_gamma_k14 %>%
ggplot(aes(as.factor(topic), gamma)) +
geom_col(fill = "#F57350") +
labs(
x = "Topic",
y = "Mean gamma"
) +
coord_flip() +
scale_y_reverse() +
scale_x_discrete(position = "top") +
theme_pubr()
chats_topics %>%
mutate(across(top_topic, as.factor)) %>%
ggplot(aes(top_topic, y = after_stat(prop), group = 1)) +
geom_bar(fill = "#1DA1F2") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "",
y = "Relative frequency"
) +
coord_flip() +
theme_pubr()
Einfluss von Meta-Variablen
effects <- estimateEffect(
formula =~ platform + debate + message_during_debate,
stmobj = tpm_k14,
metadata = chats_topics)summary(effects, topics = 12)
Call:
estimateEffect(formula = ~platform + debate + message_during_debate,
stmobj = tpm_k14, metadata = chats_topics)
Topic 12:
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.128058 28.831457 -0.004 0.99646
platformyoutube 0.214453 28.831531 0.007 0.99407
debatevice presidential 0.203569 28.831506 0.007 0.99437
message_during_debate 0.011889 0.004316 2.755 0.00588 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(effects, topics = 8)
Call:
estimateEffect(formula = ~platform + debate + message_during_debate,
stmobj = tpm_k14, metadata = chats_topics)
Topic 8:
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1234719 29.5028653 0.004 0.997
platformyoutube -0.0333377 29.5028958 -0.001 0.999
debatevice presidential -0.0134738 29.5028788 0.000 1.000
message_during_debate 0.0006216 0.0044960 0.138 0.890