Skip to content

Commit

Permalink
add player page scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcaseb committed May 3, 2022
1 parent c4067c7 commit 612473e
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rotc
Title: Functions to Access OTC Data
Version: 0.0.0.9000
Version: 0.0.0.9001
Authors@R:
person("Carl", "Sebastian", , "[email protected]", role = c("aut", "cre"))
Description: A set of functions to access over the cap data.
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@

export(otc_historical_contracts)
export(otc_historical_contracts_all)
export(otc_player_details)
import(dplyr)
83 changes: 83 additions & 0 deletions R/otc_player_details.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#' Scrape Player Details
#'
#' @param player_url A valid OverTheCap player url
#'
#' @return A tibble containing draft details and contract season history
#' @export
#'
#' @examples
#' \donttest{
#' otc_player_details("https://overthecap.com/player/aaron-rodgers/1085/")
#' }
otc_player_details <- function(player_url){
# for tests
# player_url <- "https://overthecap.com/player/aaron-rodgers/1085/"
# player_url <- "https://overthecap.com/player/brett-favre/6357/"
# player_url <- "https://overthecap.com/player/donovan-mcnabb/6750/"
# player_url <- "https://overthecap.com/player/kyle-spalding/9822/"

cli::cli_progress_step("Scrape {.url {player_url}}")

html_scrape <- httr2::request(player_url) %>%
httr2::req_retry(max_tries = 5) %>%
httr2::req_perform() %>%
httr2::resp_body_html()

season_history <-
xml2::xml_find_all(html_scrape, ".//*[@class = 'contract salary-cap-history player-new']") |>
rvest::html_table() |>
purrr::pluck(1)

# catch missing season history
if (!is.null(season_history)){
season_history <- season_history |>
janitor::remove_empty("cols") |>
janitor::clean_names()
}

# Entry info of active players
entry_info <- xml2::xml_find_all(html_scrape, ".//*[@class = 'league-entry-info']") |>
xml2::xml_contents()

# Entry info of non-active players
player_bio <- xml2::xml_find_all(html_scrape, ".//*[@class = 'player-bio inactive-fg']") |>
xml2::xml_contents()

# decide which entry info to parse
# if both are missing, just return season history and player url
if (length(entry_info) != 0){
to_parse <- entry_info
} else if (length(player_bio) == 0 || all(xml2::xml_text(player_bio) == "")){
return(
data.frame(
season_history = list(season_history),
player_url = player_url
)
)
} else {
to_parse <- player_bio
}

to_parse |>
xml2::xml_text() |>
stringr::str_split(": ") |>
purrr::map_dfc(function(i){data.frame(out = i[[2]]) |> rlang::set_names(i[[1]])}) |>
janitor::clean_names() |>
tidyr::separate(
entry,
into = c("draft_year", "draft_round", "draft_overall"),
sep = ", ",
fill = "right",
remove = FALSE
) |>
dplyr::mutate(
draft_year = stringr::str_extract(draft_year, "[:digit:]+") |> as.integer(),
draft_round = stringr::str_extract(draft_round, "[:digit:]+") |> as.integer(),
draft_team = stringr::str_extract(entry, "(?<=\\()[:[:alpha:]:]+(?=\\))"),
draft_overall = stringr::str_extract(draft_overall, "[:digit:]+") |> as.integer(),
season_history = list(season_history),
player_url = player_url
) |>
dplyr::select(-entry)

}
7 changes: 6 additions & 1 deletion R/silence_tidy_eval_notes.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ utils::globalVariables(
"apy_cap_pct",
"player",
"team",
"is_active"
"is_active",
"entry",
"draft_year",
"draft_round",
"draft_overall",
"position"
),
package = "rotc"
)
13 changes: 12 additions & 1 deletion man/otc_historical_contracts_all.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/otc_player_details.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 612473e

Please sign in to comment.