# Load packages
source(here::here("data_collection/00_02-setup-session.R"))Quality Control: Chat Emotes
Information
This document outlines the process of quality control for chat emotes in a dataset. The key steps include:
- Extracting and processing emote information from chat messages.
- Generating frequency tables for emotes and the number of emotes per message.
- Identifying and handling cases where emote names are missing.
- Creating and saving a dictionary of unique emotes for further analysis.
Preparation
# Load data
chat <- qs::qread(here("local_data/chat-debates_full.qs"))Create list of emotes
Note
For example images of the emojis visit
emotes <- chat$correct %>%
select(message_id, message_emotes) %>%
unnest(message_emotes) %>%
mutate(
emote_id = sapply(message_emotes, function(emote) emote$id),
emote_name = sapply(message_emotes, function(emote) emote$name),
emote_locations = sapply(message_emotes, function(emote) paste(emote$locations, collapse = ", "))
) %>%
select(message_id, emote_id, emote_name, emote_locations)emotes %>%
frq(emote_name, sort.frq = "desc", min.frq = 25) %>%
data.frame() %>%
select(val, frq, raw.prc, cum.prc) %>%
rownames_to_column("rank") %>%
rename("Emote" = val, "n" = frq) %>%
gt() %>%
gtExtras::gt_theme_538() %>%
tab_options(table.width = pct(80))| rank | Emote | n | raw.prc | cum.prc |
|---|---|---|---|---|
| 1 | LUL | 14364 | 20.38 | 20.4 |
| 2 | hasL | 5640 | 8.00 | 28.4 |
| 3 | bleedPurple | 5171 | 7.34 | 35.7 |
| 4 | Kappa | 4154 | 5.89 | 41.6 |
| 5 | <3 | 1789 | 2.54 | 44.1 |
| 6 | NotLikeThis | 1322 | 1.88 | 46.0 |
| 7 | hasChud | 1154 | 1.64 | 47.7 |
| 8 | :) | 1050 | 1.49 | 49.1 |
| 9 | elbyBlom | 1001 | 1.42 | 50.6 |
| 10 | hasSlam | 965 | 1.37 | 51.9 |
| 11 | hasHi | 840 | 1.19 | 53.1 |
| 12 | WutFace | 681 | 0.97 | 54.1 |
| 13 | :( | 675 | 0.96 | 55.1 |
| 14 | hasMods | 593 | 0.84 | 55.9 |
| 15 | hasO | 576 | 0.82 | 56.7 |
| 16 | hasBoot | 540 | 0.77 | 57.5 |
| 17 | :D | 538 | 0.76 | 58.2 |
| 18 | hasRaid | 456 | 0.65 | 58.9 |
| 19 | hasSadge | 443 | 0.63 | 59.5 |
| 20 | hasBaited | 384 | 0.54 | 60.1 |
| 21 | hasKkona | 374 | 0.53 | 60.6 |
| 22 | hasHmm | 368 | 0.52 | 61.1 |
| 23 | PopNemo | 331 | 0.47 | 61.6 |
| 24 | 4Head | 305 | 0.43 | 62.0 |
| 25 | hasCapital | 292 | 0.41 | 62.4 |
| 26 | has0head | 286 | 0.41 | 62.8 |
| 27 | hasFlex | 283 | 0.40 | 63.2 |
| 28 | SeemsGood | 271 | 0.38 | 63.6 |
| 29 | DinoDance | 267 | 0.38 | 64.0 |
| 30 | ResidentSleeper | 261 | 0.37 | 64.4 |
| 31 | TheIlluminati | 255 | 0.36 | 64.7 |
| 32 | R) | 245 | 0.35 | 65.1 |
| 33 | SUBtember | 234 | 0.33 | 65.4 |
| 34 | TwitchConHYPE | 232 | 0.33 | 65.8 |
| 35 | DoritosChip | 230 | 0.33 | 66.1 |
| 36 | hasWeird | 226 | 0.32 | 66.4 |
| 37 | Jebaited | 217 | 0.31 | 66.7 |
| 38 | hasPray | 214 | 0.30 | 67.0 |
| 39 | hasPOGGIES | 213 | 0.30 | 67.3 |
| 40 | hasLeft | 211 | 0.30 | 67.6 |
| 41 | hasRight | 204 | 0.29 | 67.9 |
| 42 | PogChamp | 204 | 0.29 | 68.2 |
| 43 | hasSmol | 201 | 0.29 | 68.5 |
| 44 | 200 | 0.28 | 68.8 | |
| 45 | BatChest | 196 | 0.28 | 69.0 |
| 46 | :face_with_tears_of_joy: | 192 | 0.27 | 69.3 |
| 47 | :rolling_on_the_floor_laughing: | 178 | 0.25 | 69.6 |
| 48 | MrDestructoid | 167 | 0.24 | 69.8 |
| 49 | hasKapp | 152 | 0.22 | 70.0 |
| 50 | BibleThump | 149 | 0.21 | 70.2 |
| 51 | :coconut: | 142 | 0.20 | 70.4 |
| 52 | hasPause | 137 | 0.19 | 70.6 |
| 53 | DansGame | 136 | 0.19 | 70.8 |
| 54 | hasSilly | 135 | 0.19 | 71.0 |
| 55 | hasWut | 134 | 0.19 | 71.2 |
| 56 | :O | 129 | 0.18 | 71.4 |
| 57 | hasREE | 125 | 0.18 | 71.5 |
| 58 | hasWhat | 124 | 0.18 | 71.7 |
| 59 | dsaFP | 123 | 0.17 | 71.9 |
| 60 | hasChair | 119 | 0.17 | 72.1 |
| 61 | FailFish | 117 | 0.17 | 72.2 |
| 62 | vioSASS | 117 | 0.17 | 72.4 |
| 63 | hasNerd | 112 | 0.16 | 72.6 |
| 64 | hasPains | 110 | 0.16 | 72.7 |
| 65 | VoteYea | 110 | 0.16 | 72.9 |
| 66 | TransgenderPride | 107 | 0.15 | 73.0 |
| 67 | atpRtsd | 106 | 0.15 | 73.2 |
| 68 | BabyRage | 106 | 0.15 | 73.3 |
| 69 | L | 106 | 0.15 | 73.5 |
| 70 | :P | 104 | 0.15 | 73.6 |
| 71 | PoroSad | 104 | 0.15 | 73.8 |
| 72 | SMOrc | 102 | 0.14 | 73.9 |
| 73 | dsaL | 100 | 0.14 | 74.1 |
| 74 | :/ | 96 | 0.14 | 74.2 |
| 75 | dsaNODDERS | 94 | 0.13 | 74.3 |
| 76 | rathboPALESTINEHEART | 94 | 0.13 | 74.5 |
| 77 | hasRage | 91 | 0.13 | 74.6 |
| 78 | hasPOGGERS | 88 | 0.12 | 74.7 |
| 79 | HeyGuys | 84 | 0.12 | 74.8 |
| 80 | CoolStoryBob | 83 | 0.12 | 75.0 |
| 81 | hasRant | 83 | 0.12 | 75.1 |
| 82 | has5 | 81 | 0.11 | 75.2 |
| 83 | dsaKEKW | 79 | 0.11 | 75.3 |
| 84 | CurseLit | 77 | 0.11 | 75.4 |
| 85 | BigSad | 73 | 0.10 | 75.5 |
| 86 | hasGun | 73 | 0.10 | 75.6 |
| 87 | WhySoSerious | 73 | 0.10 | 75.7 |
| 88 | ;) | 70 | 0.10 | 75.8 |
| 89 | PopCorn | 69 | 0.10 | 75.9 |
| 90 | hasSpooked | 66 | 0.09 | 76.0 |
| 91 | Squid2 | 65 | 0.09 | 76.1 |
| 92 | atpCap | 63 | 0.09 | 76.2 |
| 93 | Squid1 | 63 | 0.09 | 76.3 |
| 94 | :grinning_squinting_face: | 62 | 0.09 | 76.4 |
| 95 | Squid3 | 62 | 0.09 | 76.5 |
| 96 | Squid4 | 62 | 0.09 | 76.5 |
| 97 | hasPog | 61 | 0.09 | 76.6 |
| 98 | hasComfy | 60 | 0.09 | 76.7 |
| 99 | hasBuff | 59 | 0.08 | 76.8 |
| 100 | hasUnless | 59 | 0.08 | 76.9 |
| 101 | GoldPLZ | 57 | 0.08 | 77.0 |
| 102 | hasZzz | 54 | 0.08 | 77.0 |
| 103 | KomodoHype | 54 | 0.08 | 77.1 |
| 104 | GoatEmotey | 53 | 0.08 | 77.2 |
| 105 | hasPrime | 53 | 0.08 | 77.3 |
| 106 | hasStop | 53 | 0.08 | 77.3 |
| 107 | Shush | 51 | 0.07 | 77.4 |
| 108 | CoolCat | 50 | 0.07 | 77.5 |
| 109 | 🇵🇸 | 49 | 0.07 | 77.5 |
| 110 | dsaKnee | 49 | 0.07 | 77.6 |
| 111 | hasWicked | 49 | 0.07 | 77.7 |
| 112 | PowerUpR | 49 | 0.07 | 77.8 |
| 113 | CaitlynS | 48 | 0.07 | 77.8 |
| 114 | hasHug | 48 | 0.07 | 77.9 |
| 115 | HypeLUL | 48 | 0.07 | 78.0 |
| 116 | KappaPride | 48 | 0.07 | 78.0 |
| 117 | hasHAAA | 47 | 0.07 | 78.1 |
| 118 | ironmouseLOVE | 47 | 0.07 | 78.2 |
| 119 | sL | 47 | 0.07 | 78.2 |
| 120 | strug4Free | 47 | 0.07 | 78.3 |
| 121 | FBCatch | 46 | 0.07 | 78.4 |
| 122 | OSFrog | 46 | 0.07 | 78.4 |
| 123 | PowerUpL | 46 | 0.07 | 78.5 |
| 124 | GlitchCat | 45 | 0.06 | 78.6 |
| 125 | hasMad | 45 | 0.06 | 78.6 |
| 126 | hasSmash | 45 | 0.06 | 78.7 |
| 127 | hasTruth | 45 | 0.06 | 78.8 |
| 128 | MaxLOL | 45 | 0.06 | 78.8 |
| 129 | ScaredyCat | 44 | 0.06 | 78.9 |
| 130 | :face-fuchsia-tongue-out: | 42 | 0.06 | 78.9 |
| 131 | hasD | 42 | 0.06 | 79.0 |
| 132 | OhMyDog | 42 | 0.06 | 79.1 |
| 133 | DatSheffy | 41 | 0.06 | 79.1 |
| 134 | FBtouchdown | 41 | 0.06 | 79.2 |
| 135 | PJSugar | 41 | 0.06 | 79.2 |
| 136 | TTours | 41 | 0.06 | 79.3 |
| 137 | :face-green-smiling: | 40 | 0.06 | 79.3 |
| 138 | rathboFREE | 39 | 0.06 | 79.4 |
| 139 | dsaCOPIUM | 38 | 0.05 | 79.5 |
| 140 | hasBOOMER | 38 | 0.05 | 79.5 |
| 141 | :fire: | 37 | 0.05 | 79.6 |
| 142 | :p | 37 | 0.05 | 79.6 |
| 143 | BloodTrail | 37 | 0.05 | 79.7 |
| 144 | EleGiggle | 37 | 0.05 | 79.7 |
| 145 | FBBlock | 37 | 0.05 | 79.8 |
| 146 | Keepo | 37 | 0.05 | 79.8 |
| 147 | rathboEST | 37 | 0.05 | 79.9 |
| 148 | rathboPAL | 37 | 0.05 | 79.9 |
| 149 | gremloeFP | 36 | 0.05 | 80.0 |
| 150 | hasEZ | 36 | 0.05 | 80.0 |
| 151 | rathboINE | 36 | 0.05 | 80.1 |
| 152 | :palm_tree: | 35 | 0.05 | 80.1 |
| 153 | rathboFREEPALESTINE | 35 | 0.05 | 80.2 |
| 154 | yugopnXISALUTE | 35 | 0.05 | 80.2 |
| 155 | strug4F12 | 34 | 0.05 | 80.3 |
| 156 | DarkMode | 32 | 0.05 | 80.3 |
| 157 | dsaFacepalm | 32 | 0.05 | 80.4 |
| 158 | mermai40Heart | 32 | 0.05 | 80.4 |
| 159 | vioSLUDGE | 32 | 0.05 | 80.5 |
| 160 | yugopnRAID | 32 | 0.05 | 80.5 |
| 161 | BopBop | 31 | 0.04 | 80.5 |
| 162 | dsaBrooks | 31 | 0.04 | 80.6 |
| 163 | hasSammie | 31 | 0.04 | 80.6 |
| 164 | PokPikachu | 31 | 0.04 | 80.7 |
| 165 | PotFriend | 31 | 0.04 | 80.7 |
| 166 | forsenBased | 30 | 0.04 | 80.8 |
| 167 | :o | 29 | 0.04 | 80.8 |
| 168 | hasKomrade | 29 | 0.04 | 80.8 |
| 169 | GayPride | 28 | 0.04 | 80.9 |
| 170 | ironmouseSABERDANCE | 28 | 0.04 | 80.9 |
| 171 | PixelBob | 28 | 0.04 | 81.0 |
| 172 | VoHiYo | 28 | 0.04 | 81.0 |
| 173 | 🇺🇦 | 27 | 0.04 | 81.0 |
| 174 | CarlSmile | 27 | 0.04 | 81.1 |
| 175 | ckwanFartdance | 27 | 0.04 | 81.1 |
| 176 | MiniK | 27 | 0.04 | 81.2 |
| 177 | vioFP | 27 | 0.04 | 81.2 |
| 178 | VoteNay | 27 | 0.04 | 81.2 |
| 179 | yugopnPal | 27 | 0.04 | 81.3 |
| 180 | :orange_circle: | 26 | 0.04 | 81.3 |
| 181 | forsenLaughingAtYou | 26 | 0.04 | 81.3 |
| 182 | FortOne | 26 | 0.04 | 81.4 |
| 183 | hasFatty | 26 | 0.04 | 81.4 |
| 184 | hasGachi | 26 | 0.04 | 81.5 |
| 185 | KappaHD | 26 | 0.04 | 81.5 |
| 186 | KEKHeim | 26 | 0.04 | 81.5 |
| 187 | rhyzROT | 26 | 0.04 | 81.6 |
| 188 | :loudly_crying_face: | 25 | 0.04 | 81.6 |
| 189 | dsaDance | 25 | 0.04 | 81.7 |
| 190 | dsaHmm | 25 | 0.04 | 81.7 |
| 191 | ironmouseWiggly | 25 | 0.04 | 81.7 |
| 192 | PogBones | 25 | 0.04 | 81.8 |
| 193 | n < 25 | 12861 | 18.25 | 100.0 |
| 194 | NA | 0 | 0.00 | NA |
emotes %>%
group_by(message_id) %>%
summarise( n = n()) %>%
frq(n) %>%
data.frame() %>%
select(val, frq, raw.prc, cum.prc) %>%
rename("Number of Emotes" = val, "n" = frq) %>%
gt() %>%
gtExtras::gt_theme_538() | Number of Emotes | n | raw.prc | cum.prc |
|---|---|---|---|
| 1 | 62534 | 94.93 | 94.9 |
| 2 | 2465 | 3.74 | 98.7 |
| 3 | 612 | 0.93 | 99.6 |
| 4 | 178 | 0.27 | 99.9 |
| 5 | 55 | 0.08 | 100.0 |
| 6 | 13 | 0.02 | 100.0 |
| 7 | 9 | 0.01 | 100.0 |
| 8 | 4 | 0.01 | 100.0 |
| 10 | 1 | 0.00 | 100.0 |
| 13 | 1 | 0.00 | 100.0 |
| NA | 0 | 0.00 | NA |
Check: “Empty emotes”
emotes %>%
group_by(emote_name, emote_id) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
filter(emote_name == "") %>%
gt() %>%
gtExtras::gt_theme_538()`summarise()` has grouped output by 'emote_name'. You can override using the
`.groups` argument.
| emote_id | n |
|---|---|
| 425618 | 53 |
| 302140936 | 29 |
| 555555558 | 21 |
| 555555584 | 21 |
| 1 | 13 |
| 555555560 | 11 |
| 555555580 | 8 |
| 306629700 | 5 |
| 300238152 | 4 |
| 302587115 | 3 |
| 303446392 | 3 |
| emotesv2_4f058d58458544a4971de55672468204 | 3 |
| 300238154 | 2 |
| 300756431 | 2 |
| 555555577 | 2 |
| 555555589 | 2 |
| 300238151 | 1 |
| 300238155 | 1 |
| 301079765 | 1 |
| 303433990 | 1 |
| 303706436 | 1 |
| 304192517 | 1 |
| 354 | 1 |
| 489 | 1 |
| 508650 | 1 |
| 555555557 | 1 |
| 555555563 | 1 |
| 555555585 | 1 |
| 6 | 1 |
| 626795 | 1 |
| 62835 | 1 |
| emotesv2_1c432fe325994220960ed5720682ca63 | 1 |
| emotesv2_819621bcb8f44566a1bd8ea63d06c58f | 1 |
| emotesv2_89106685bdb643c2943994b027e25556 | 1 |
emote_name_recode <- emotes %>%
group_by(emote_id, emote_name) %>%
summarise(n = n(), .groups = 'drop') %>%
arrange(desc(n)) %>%
group_by(emote_id) %>%
filter(n == max(n)) %>%
slice(1) %>%
select(emote_id, emote_name)
emotes_recoded <- emotes %>%
left_join(emote_name_recode, by = "emote_id", suffix = c("", "_recode")) %>%
mutate(emote_name = ifelse(!is.na(emote_name_recode), emote_name_recode, emote_name)) %>%
select(-emote_name_recode)Create emote dictionary
# Extract unique emotes
emotes_recoded_names <- emotes_recoded %>%
distinct(emote_name, .keep_all = TRUE) %>%
arrange(emote_name) %>%
filter(emote_name != "") %>%
pull(emote_name)
# Ensure the list is named
named_emotes_recoded_names <- setNames(
as.list(emotes_recoded_names), emotes_recoded_names)
# Create a dictionary
dict_emotes <- dictionary(named_emotes_recoded_names)
# Save the dictionary to a file
saveRDS(dict_emotes, file = here(project_dir, "local_data/dictionary_chat_emotes.RDS"))