Showcase
Focus on Twitter API v2
Background
Practical application of the Twitter Academic Research Product Track v2 API endpoint with the help of the academictwitteR (Barrie & Ho, 2021) package. Visit the repository of the package for further information.
This version of the Twitter API allows researchers to access larger volumes of Twitter data. For more information on the the Twitter API, including how to apply for access to the Academic Research Product Track, see the Twitter Developer platform.
This showcase contains two exemplary uses of the API: Analysis of specific hashtags (e.g. #Karneval) and/or specific accounts (e.g. @elonmusk)
Preparation
Load necessary packages
library(here) # Easy file path construction
library(academictwitteR) # Collecting the data
library(lubridate) # Work with date-times and time-spans
library(sjmisc) # Collection of miscellaneous utility functions
library(tidyverse) # Preparation of the data
library(quanteda) # Text mining
library(quanteda.textstats) # Text statistics
library(quanteda.textplots) # Visualisation of text data
library(ggthemes) # Custom ggplot themes
library(ggpubr) # Convenience functions for 'ggplot2'-plots
Set personal bearer token
<- "INSERT BEARER TOKEN HERE" personal_bearer_token
Mining tweets: profile(s)
Data collection
get_all_tweets(
users = c("elonmusk"),
start_tweets = "2020-11-11T00:00:00Z",
end_tweets = "2022-11-13T12:00:00Z",
file = "elonmusk",
data_path = here("content/04-api_access-twitter/data.local/raw_elonmusk/"),
n = 100000,
bearer_token = personal_bearer_token
)
Read data from disc
<- bind_tweets(
tweets_musk data_path = here("content/04-api_access-twitter/data.local/raw_elonmusk"),
# data_path = "data/raw_karneval",
output_format = "tidy") %>%
mutate(
datetime = ymd_hms(created_at),
date = date(datetime),
hour = hour(datetime),
min = minute(datetime),
hms = hms::as_hms(datetime),
hm = hms::parse_hm(hms)
)
Data analysis
Overview of dataset
%>% glimpse tweets_musk
Rows: 7,255
Columns: 37
$ tweet_id <chr> "1336809767574982658", "1336808486022258688", "…
$ user_username <chr> "elonmusk", "elonmusk", "elonmusk", "elonmusk",…
$ text <chr> "Fuel header tank pressure was low during landi…
$ conversation_id <chr> "1336808486022258688", "1336808486022258688", "…
$ author_id <chr> "44196397", "44196397", "44196397", "44196397",…
$ in_reply_to_user_id <chr> "44196397", NA, "4914384040", "3101588527", "34…
$ source <chr> "Twitter for iPhone", "Twitter for iPhone", "Tw…
$ possibly_sensitive <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ created_at <chr> "2020-12-09T23:07:39.000Z", "2020-12-09T23:02:3…
$ lang <chr> "en", "en", "und", "en", "en", "en", "en", "en"…
$ user_created_at <chr> "2009-06-02T20:12:29.000Z", "2009-06-02T20:12:2…
$ user_description <chr> "", "", "", "", "", "", "", "", "", "", "", "",…
$ user_profile_image_url <chr> "https://pbs.twimg.com/profile_images/159096873…
$ user_name <chr> "Elon Musk", "Elon Musk", "Elon Musk", "Elon Mu…
$ user_protected <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ user_verified <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
$ user_location <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ user_url <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ user_pinned_tweet_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ retweet_count <int> 6987, 9775, 349, 348, 1055, 7732, 10719, 324, 5…
$ like_count <int> 96961, 106142, 15267, 13204, 22581, 0, 170964, …
$ quote_count <int> 2027, 2953, 93, 111, 177, 0, 5106, 92, 143, 12,…
$ user_tweet_count <int> 20290, 20290, 20290, 20290, 20290, 20290, 20290…
$ user_list_count <int> 102027, 102027, 102027, 102027, 102027, 102027,…
$ user_followers_count <int> 115405919, 115405919, 115405919, 115405919, 115…
$ user_following_count <int> 130, 130, 130, 130, 130, 130, 130, 130, 130, 13…
$ sourcetweet_type <chr> NA, "quoted", NA, NA, NA, "retweeted", NA, NA, …
$ sourcetweet_id <chr> NA, "1336777137391456256", NA, NA, NA, "1336349…
$ sourcetweet_text <chr> NA, "Watch Starship high-altitude test live → h…
$ sourcetweet_lang <chr> NA, "en", NA, NA, NA, "en", NA, NA, NA, NA, "en…
$ sourcetweet_author_id <chr> NA, "34743251", NA, NA, NA, "34743251", NA, NA,…
$ datetime <dttm> 2020-12-09 23:07:39, 2020-12-09 23:02:34, 2020…
$ date <date> 2020-12-09, 2020-12-09, 2020-12-09, 2020-12-08…
$ hour <int> 23, 23, 18, 18, 16, 16, 16, 2, 2, 0, 22, 20, 18…
$ min <int> 7, 2, 13, 5, 57, 55, 44, 51, 50, 16, 16, 13, 14…
$ hms <time> 23:07:39, 23:02:34, 18:13:21, 18:05:28, 16:57:…
$ hm <time> 23:07:00, 23:02:00, 18:13:00, 18:05:00, 16:57:…
Tweets over time
%>%
tweets_musk ggplot(aes(date)) +
geom_bar() +
theme_pubr()
Tweets with the most likes
%>%
tweets_musk filter(is.na(sourcetweet_type)) %>%
arrange(-like_count) %>%
select(text, created_at, like_count) %>%
head(10)
# A tibble: 10 × 3
text creat…¹ like_…²
<chr> <chr> <int>
1 "Next I’m buying Coca-Cola to put the cocaine back in" 2022-0… 4767770
2 "I hope that even my worst critics remain on Twitter, becaus… 2022-0… 3221746
3 "Let’s make Twitter maximum fun!" 2022-0… 2641791
4 "\U0001f680\U0001f4ab♥️ Yesss!!! ♥️\U0001f4ab\U0001f680 https:… 2022-0… 2599811
5 "Listen, I can’t do miracles ok https://t.co/z7dvLMUXy8" 2022-0… 2572036
6 "the bird is freed" 2022-1… 2497902
7 "Comedy is now legal on Twitter" 2022-1… 2400045
8 "https://t.co/kGncG7Hs3M" 2022-1… 1893281
9 "If I die under mysterious circumstances, it’s been nice kno… 2022-0… 1891028
10 "The extreme antibody reaction from those who fear free spee… 2022-0… 1647281
# … with abbreviated variable names ¹created_at, ²like_count
Tweets with the most retweets
%>%
tweets_musk filter(is.na(sourcetweet_type)) %>%
arrange(-retweet_count) %>%
select(text, created_at, retweet_count) %>%
head(10)
# A tibble: 10 × 3
text creat…¹ retwe…²
<chr> <chr> <int>
1 "Next I’m buying Coca-Cola to put the cocaine back in" 2022-0… 679688
2 "I hope that even my worst critics remain on Twitter, becaus… 2022-0… 366807
3 "the bird is freed" 2022-1… 357937
4 "\U0001f680\U0001f4ab♥️ Yesss!!! ♥️\U0001f4ab\U0001f680 https:… 2022-0… 346717
5 "Comedy is now legal on Twitter" 2022-1… 261224
6 "Listen, I can’t do miracles ok https://t.co/z7dvLMUXy8" 2022-0… 212059
7 "https://t.co/Q9OjlJhi7f" 2022-0… 207978
8 "Let’s make Twitter maximum fun!" 2022-0… 193913
9 "The extreme antibody reaction from those who fear free spee… 2022-0… 191983
10 "Entering Twitter HQ – let that sink in! https://t.co/D68z4K… 2022-1… 190507
# … with abbreviated variable names ¹created_at, ²retweet_count
Proportion of tweets
%>%
tweets_musk frq(sourcetweet_type)
sourcetweet_type <character>
# total N=7255 valid N=489 mean=1.74 sd=0.44
Value | N | Raw % | Valid % | Cum. %
-------------------------------------------
quoted | 125 | 1.72 | 25.56 | 25.56
retweeted | 364 | 5.02 | 74.44 | 100.00
<NA> | 6766 | 93.26 | <NA> | <NA>
Languate of tweets
%>%
tweets_musk frq(lang)
lang <character>
# total N=7255 valid N=7255 mean=14.37 sd=9.86
Value | N | Raw % | Valid % | Cum. %
---------------------------------------
ar | 2 | 0.03 | 0.03 | 0.03
art | 12 | 0.17 | 0.17 | 0.19
bg | 1 | 0.01 | 0.01 | 0.21
ca | 10 | 0.14 | 0.14 | 0.34
cs | 1 | 0.01 | 0.01 | 0.36
cy | 1 | 0.01 | 0.01 | 0.37
da | 6 | 0.08 | 0.08 | 0.45
de | 24 | 0.33 | 0.33 | 0.79
el | 2 | 0.03 | 0.03 | 0.81
en | 5915 | 81.53 | 81.53 | 82.34
es | 15 | 0.21 | 0.21 | 82.55
et | 6 | 0.08 | 0.08 | 82.63
eu | 3 | 0.04 | 0.04 | 82.67
fr | 20 | 0.28 | 0.28 | 82.95
hi | 1 | 0.01 | 0.01 | 82.96
ht | 2 | 0.03 | 0.03 | 82.99
hu | 2 | 0.03 | 0.03 | 83.02
in | 8 | 0.11 | 0.11 | 83.13
is | 1 | 0.01 | 0.01 | 83.14
it | 6 | 0.08 | 0.08 | 83.23
ja | 5 | 0.07 | 0.07 | 83.29
lt | 3 | 0.04 | 0.04 | 83.34
nl | 4 | 0.06 | 0.06 | 83.39
no | 1 | 0.01 | 0.01 | 83.40
pl | 5 | 0.07 | 0.07 | 83.47
pt | 7 | 0.10 | 0.10 | 83.57
qam | 40 | 0.55 | 0.55 | 84.12
qht | 1 | 0.01 | 0.01 | 84.14
qme | 81 | 1.12 | 1.12 | 85.25
qst | 5 | 0.07 | 0.07 | 85.32
ro | 3 | 0.04 | 0.04 | 85.36
ru | 7 | 0.10 | 0.10 | 85.46
sl | 2 | 0.03 | 0.03 | 85.49
tl | 59 | 0.81 | 0.81 | 86.30
tr | 5 | 0.07 | 0.07 | 86.37
uk | 1 | 0.01 | 0.01 | 86.38
und | 817 | 11.26 | 11.26 | 97.64
vi | 1 | 0.01 | 0.01 | 97.66
zh | 1 | 0.01 | 0.01 | 97.67
zxx | 169 | 2.33 | 2.33 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Text mining
Preprocessing
<- "&|<|>"
remove_html
<- tweets_musk %>%
tweets_en filter(lang == "en",
is.na(sourcetweet_type)) %>%
select(tweet_id, text, user_username) %>%
mutate(text = str_remove_all(text, remove_html))
<- corpus(tweets_en,
tweets_en_corpus docid_field = "tweet_id",
text_field = "text")
<-
tweets_en_tokens tokens(tweets_en_corpus,
remove_punct = TRUE,
remove_numbers = TRUE,
remove_symbols = TRUE,
remove_url = TRUE) %>%
tokens_tolower() %>%
tokens_remove(stopwords("english"))
<- dfm(tweets_en_tokens) tweets_en_dfm
Analysis
Top Mentions
<- dfm_select(tweets_en_dfm, pattern = "@*")
user_dfm <- names(topfeatures(user_dfm, 50))
topuser head(topuser, 10)
[1] "@wholemarsblog" "@spacex" "@teslaownerssv" "@ppathole"
[5] "@tesla" "@erdayastronaut" "@billym2k" "@teslarati"
[9] "@sawyermerritt" "@evafoxu"
Top 10 features
<- textstat_frequency(tweets_en_clean)
term_freq_en head(term_freq_en, n = 10)
feature frequency rank docfreq group
1 tesla 354 1 328 all
2 just 227 2 225 all
3 good 215 3 209 all
4 great 187 4 183 all
5 much 184 5 180 all
6 like 172 6 168 all
7 can 169 7 165 all
8 people 167 8 156 all
9 twitter 156 9 147 all
10 one 146 10 144 all
Wordcloud with Top 50 features
textplot_wordcloud(tweets_en_clean, max_words = 50)
References
Barrie, C., & Ho, J. (2021). academictwitteR: An r package to access the twitter academic research product track v2 API endpoint. Journal of Open Source Software, 6(62), 3272. https://doi.org/10.21105/joss.03272