From 612473e92d380d1736c861aa75fac0bd9e6f0d2a Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Tue, 3 May 2022 11:39:47 +0200 Subject: [PATCH] add player page scraper --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/otc_player_details.R | 83 +++++++++++++++++++++++++++++ R/silence_tidy_eval_notes.R | 7 ++- man/otc_historical_contracts_all.Rd | 13 ++++- man/otc_player_details.Rd | 22 ++++++++ 6 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 R/otc_player_details.R create mode 100644 man/otc_player_details.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2cb5d13..6eef3f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rotc Title: Functions to Access OTC Data -Version: 0.0.0.9000 +Version: 0.0.0.9001 Authors@R: person("Carl", "Sebastian", , "mrcaseb@gmail.com", role = c("aut", "cre")) Description: A set of functions to access over the cap data. diff --git a/NAMESPACE b/NAMESPACE index d62affb..f6e81e7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,4 +2,5 @@ export(otc_historical_contracts) export(otc_historical_contracts_all) +export(otc_player_details) import(dplyr) diff --git a/R/otc_player_details.R b/R/otc_player_details.R new file mode 100644 index 0000000..e269b96 --- /dev/null +++ b/R/otc_player_details.R @@ -0,0 +1,83 @@ +#' Scrape Player Details +#' +#' @param player_url A valid OverTheCap player url +#' +#' @return A tibble containing draft details and contract season history +#' @export +#' +#' @examples +#' \donttest{ +#' otc_player_details("https://overthecap.com/player/aaron-rodgers/1085/") +#' } +otc_player_details <- function(player_url){ + # for tests + # player_url <- "https://overthecap.com/player/aaron-rodgers/1085/" + # player_url <- "https://overthecap.com/player/brett-favre/6357/" + # player_url <- "https://overthecap.com/player/donovan-mcnabb/6750/" + # player_url <- "https://overthecap.com/player/kyle-spalding/9822/" + + cli::cli_progress_step("Scrape {.url {player_url}}") + + html_scrape <- httr2::request(player_url) %>% + httr2::req_retry(max_tries = 5) %>% + httr2::req_perform() %>% + httr2::resp_body_html() + + season_history <- + xml2::xml_find_all(html_scrape, ".//*[@class = 'contract salary-cap-history player-new']") |> + rvest::html_table() |> + purrr::pluck(1) + + # catch missing season history + if (!is.null(season_history)){ + season_history <- season_history |> + janitor::remove_empty("cols") |> + janitor::clean_names() + } + + # Entry info of active players + entry_info <- xml2::xml_find_all(html_scrape, ".//*[@class = 'league-entry-info']") |> + xml2::xml_contents() + + # Entry info of non-active players + player_bio <- xml2::xml_find_all(html_scrape, ".//*[@class = 'player-bio inactive-fg']") |> + xml2::xml_contents() + + # decide which entry info to parse + # if both are missing, just return season history and player url + if (length(entry_info) != 0){ + to_parse <- entry_info + } else if (length(player_bio) == 0 || all(xml2::xml_text(player_bio) == "")){ + return( + data.frame( + season_history = list(season_history), + player_url = player_url + ) + ) + } else { + to_parse <- player_bio + } + + to_parse |> + xml2::xml_text() |> + stringr::str_split(": ") |> + purrr::map_dfc(function(i){data.frame(out = i[[2]]) |> rlang::set_names(i[[1]])}) |> + janitor::clean_names() |> + tidyr::separate( + entry, + into = c("draft_year", "draft_round", "draft_overall"), + sep = ", ", + fill = "right", + remove = FALSE + ) |> + dplyr::mutate( + draft_year = stringr::str_extract(draft_year, "[:digit:]+") |> as.integer(), + draft_round = stringr::str_extract(draft_round, "[:digit:]+") |> as.integer(), + draft_team = stringr::str_extract(entry, "(?<=\\()[:[:alpha:]:]+(?=\\))"), + draft_overall = stringr::str_extract(draft_overall, "[:digit:]+") |> as.integer(), + season_history = list(season_history), + player_url = player_url + ) |> + dplyr::select(-entry) + +} diff --git a/R/silence_tidy_eval_notes.R b/R/silence_tidy_eval_notes.R index 6a95362..88d6bab 100644 --- a/R/silence_tidy_eval_notes.R +++ b/R/silence_tidy_eval_notes.R @@ -5,7 +5,12 @@ utils::globalVariables( "apy_cap_pct", "player", "team", - "is_active" + "is_active", + "entry", + "draft_year", + "draft_round", + "draft_overall", + "position" ), package = "rotc" ) diff --git a/man/otc_historical_contracts_all.Rd b/man/otc_historical_contracts_all.Rd index 28d2a62..bc851a5 100644 --- a/man/otc_historical_contracts_all.Rd +++ b/man/otc_historical_contracts_all.Rd @@ -1,9 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/otc_historical_contracts.R +% Please edit documentation in R/otc_historical_contracts.R, +% R/otc_team_contracts.R \name{otc_historical_contracts_all} \alias{otc_historical_contracts_all} \title{Scrape Historical Contracts for Multiple Positions} \usage{ +otc_historical_contracts_all(positions = NULL) + otc_historical_contracts_all(positions = NULL) } \arguments{ @@ -11,9 +14,14 @@ otc_historical_contracts_all(positions = NULL) \code{\link[=otc_historical_contracts]{otc_historical_contracts()}}.} } \value{ +A tibble + A tibble } \description{ +This is a wrapper around \code{\link[=otc_historical_contracts]{otc_historical_contracts()}} that +scrapes and binds multiple positions. + This is a wrapper around \code{\link[=otc_historical_contracts]{otc_historical_contracts()}} that scrapes and binds multiple positions. } @@ -21,4 +29,7 @@ scrapes and binds multiple positions. \donttest{ # otc_historical_contracts_all() } +\donttest{ +# otc_historical_contracts_all() +} } diff --git a/man/otc_player_details.Rd b/man/otc_player_details.Rd new file mode 100644 index 0000000..1dba878 --- /dev/null +++ b/man/otc_player_details.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/otc_player_details.R +\name{otc_player_details} +\alias{otc_player_details} +\title{Scrape Player Details} +\usage{ +otc_player_details(player_url) +} +\arguments{ +\item{player_url}{A valid OverTheCap player url} +} +\value{ +A tibble containing draft details and contract season history +} +\description{ +Scrape Player Details +} +\examples{ +\donttest{ + otc_player_details("https://overthecap.com/player/aaron-rodgers/1085/") +} +}