Data Preparation OpenAlex-ID
Tip
rough first working code for the collection of scholar_ids using openalexr
I optimized the algorithm to not just select the top case, but allows for multiple ids (in rows) per person, in the decision rules i use: - semantic similarity - matches of university_id and institution_id
Getting Started
splits = scholars[1:5]
oa_fetch_works = function(scholars){
works = list()
ids = scholars$id |> unique()
k = length(ids)
cli_alert("Starting now, at {Sys.time()}")
cli_progress_bar("Scraping Works", total = k, clear = FALSE)
for (id_ in ids){
tab = scholars |> filter(id == id_)
if (nrow(tab) > 0){
for (i in 1:nrow(tab)){
row = tab[i, ]
res = tryCatch(
oa_fetch(
entity = 'works',
author.id = str_remove(id_, 'https://openalex.org/'),
mailto = "jos.slabbekoorn@ru.nl"
),
error = function(e) NULL,
warning = function(w) NULL
)
}
if (!is.null(res)){
res = res |>
mutate(author_id = id_) |>
relocate(author_id, .before=id)
}
works[[id_]] = res
}
cli_progress_update()
}
return(works)
}