From 01f9dcc63c708566326839c6d2c186daa480f592 Mon Sep 17 00:00:00 2001 From: Lextuga007 Date: Wed, 20 Mar 2024 14:05:30 +0000 Subject: [PATCH 1/4] Added test for df without explicit parameter for "imd" --- tests/testthat/test-get_data.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/testthat/test-get_data.R b/tests/testthat/test-get_data.R index 54011a5..4995a6b 100644 --- a/tests/testthat/test-get_data.R +++ b/tests/testthat/test-get_data.R @@ -216,6 +216,14 @@ httptest2::with_mock_dir("imd", { ncol(get_data(lsoa_df1, url_type = "imd")), n_col_df ) + testthat::expect_equal( + nrow(get_data(lsoa_df1)), n_rows + ) + + testthat::expect_equal( + ncol(get_data(lsoa_df1)), n_col_df + ) + # vectors testthat::expect_equal( @@ -227,3 +235,4 @@ httptest2::with_mock_dir("imd", { ) }) }) + From de76d355e70587f827ba65667a90ebdd471e4eb7 Mon Sep 17 00:00:00 2001 From: Lextuga007 Date: Wed, 20 Mar 2024 14:43:37 +0000 Subject: [PATCH 2/4] Renamed file for checker functions --- R/{postcode-helper.R => checker-functions.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{postcode-helper.R => checker-functions.R} (100%) diff --git a/R/postcode-helper.R b/R/checker-functions.R similarity index 100% rename from R/postcode-helper.R rename to R/checker-functions.R From c1e6b12e56307600f9a27712f34f4762294d5383 Mon Sep 17 00:00:00 2001 From: Lextuga007 Date: Wed, 20 Mar 2024 14:44:23 +0000 Subject: [PATCH 3/4] Moved api functions to helper file and removed export --- NAMESPACE | 2 - R/get_data-helper.R | 88 ++++++++++++++++++++++++++++++ R/get_data.R | 98 +++++++++------------------------- man/api_url.Rd | 2 +- man/is_lsoa.Rd | 2 +- man/is_postcode.Rd | 2 +- man/pull_table_data.Rd | 17 ------ man/retrieve_data.Rd | 19 ------- tests/testthat/test-get_data.R | 1 - 9 files changed, 115 insertions(+), 116 deletions(-) create mode 100644 R/get_data-helper.R delete mode 100644 man/pull_table_data.Rd delete mode 100644 man/retrieve_data.Rd diff --git a/NAMESPACE b/NAMESPACE index 76409d1..5d2b63f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,4 @@ export(api_url) export(get_data) export(is_lsoa) export(is_postcode) -export(pull_table_data) -export(retrieve_data) importFrom(magrittr,"%>%") diff --git a/R/get_data-helper.R b/R/get_data-helper.R new file mode 100644 index 0000000..ffdf245 --- /dev/null +++ b/R/get_data-helper.R @@ -0,0 +1,88 @@ +#' Getting data from the IMD api +#' +#' @description +#' Only required for IMD as postcodes is routed through the {NHSRpostcodetools} +#' package +#' +#' @return dataset +#' @export +api_url <- function() { + httr2::request(paste0( + "https://services3.arcgis.com/ivmBBrHfQfDnDf8Q/arcgis/rest/services/", + "Indices_of_Multiple_Deprivation_(IMD)_2019/FeatureServer/0/query" + )) |> + httr2::req_url_query(f = "json") +} + +#' use batched IDs to retrieve table data +#' +#' @param req used in function \code{\link{imd_api}} +#' @param ids_vec used in function \code{\link{imd_api}} +#' +#' @return function +#' @noRd +retrieve_data <- function(req, ids_vec) { + ids <- stringr::str_flatten(ids_vec, collapse = ",") + req |> + httr2::req_url_query(objectIds = ids) |> + httr2::req_url_query(outFields = "*") |> # returns all columns + httr2::req_url_query(returnGeometry = FALSE) |> # superfluous tbf + httr2::req_retry(max_tries = 3) |> # shouldn't be needed + httr2::req_perform() +} + + +#' pull actual data out from API JSON response +#' +#' @param respused in function \code{\link{imd_api}} +#' +#' @return function +#' @noRd +pull_table_data <- function(resp) { + resp |> + httr2::resp_check_status() |> + httr2::resp_body_json() |> + purrr::pluck("features") |> + purrr::map_df("attributes") |> + janitor::clean_names() +} + +#' Get IMD data through API +#' +#' @description +#' Relies on functions \code{\link{retrieve_data}}, +#' \code{\link{pull_table_data}} and \code{\link{api_url}} +#' +#' @param text String. Used in the query function and feeds in either postcodes +#' or lsoas from data in the expected API url format +#' @param req +#' +#' @return data frame +#' @noRd +imd_api <- function(text, req) { + ids <- req |> + httr2::req_url_query(returnIdsonly = TRUE) |> + httr2::req_url_query(where = text) |> + httr2::req_perform() |> + httr2::resp_body_json() |> + purrr::pluck("objectIds") + + ids_batched <- NHSRpostcodetools::batch_it(ids, 100L) + + # Uses function retrieve data + # safely handle any errors + poss_retrieve_data <- purrr::possibly(retrieve_data) + + resps <- ids_batched |> + purrr::map(\(x) poss_retrieve_data(req, x)) |> + purrr::compact() + + # Uses function pull_table_data + poss_pull_table_data <- purrr::possibly(pull_table_data) + + data_out <- resps |> + purrr::map(poss_pull_table_data) |> + purrr::list_rbind() + + data_out +} diff --git a/R/get_data.R b/R/get_data.R index c635827..e2bb7c6 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -1,19 +1,3 @@ -#' Getting data from the IMD api -#' -#' @description -#' Only required for IMD as postcodes is routed through the {NHSRpostcodetools} -#' package -#' -#' @return dataset -#' @export -api_url <- function() { - httr2::request(paste0( - "https://services3.arcgis.com/ivmBBrHfQfDnDf8Q/arcgis/rest/services/", - "Indices_of_Multiple_Deprivation_(IMD)_2019/FeatureServer/0/query" - )) |> - httr2::req_url_query(f = "json") -} - #' Query information to restrict data returned #' #' @description @@ -101,29 +85,8 @@ get_data <- function(data, } if (is_postcode_check == 0 && is_lsoa_check > 0 | url_type == "imd") { - ids <- req |> - httr2::req_url_query(returnIdsonly = TRUE) |> - httr2::req_url_query(where = text) |> - httr2::req_perform() |> - httr2::resp_body_json() |> - purrr::pluck("objectIds") - - ids_batched <- NHSRpostcodetools::batch_it(ids, 100L) - - # Uses function retrieve data - # safely handle any errors - poss_retrieve_data <- purrr::possibly(retrieve_data) - - resps <- ids_batched |> - purrr::map(\(x) poss_retrieve_data(req, x)) |> - purrr::compact() - - # Uses function pull_table_data - poss_pull_table_data <- purrr::possibly(pull_table_data) - - data_out <- resps |> - purrr::map(poss_pull_table_data) |> - purrr::list_rbind() + data_out <- imd_api(text = text, + req = req) } # Because APIs only return data where a match has been made which results in @@ -131,18 +94,38 @@ get_data <- function(data, # Postcode information is passed through {NHSRpostcodetools} which handles # this but IMD is handled here. - if (exists("data_transformed") && is.data.frame(data)) { + if (exists("data_transformed") && is.data.frame(data) && + url_type == "postcode") { data |> dplyr::left_join( data_transformed ) - } else if (exists("data_transformed") && is.atomic(data)) { + } else if (exists("data_transformed") && is.atomic(data) && + url_type == "postcode") { tibble::as_tibble(data) |> dplyr::left_join( data_transformed, dplyr::join_by(value == postcode) ) |> dplyr::rename(postcode = value) + } else if (exists("data_transformed") && is.data.frame(data) && + url_type == "url") { + data |> + dplyr::left_join( + data_transformed + ) |> + dplyr::left_join( + data_out, + dplyr::join_by(lsoa_code == lsoa) + ) + # } else if (exists("data_transformed") && is.atomic(data) && + # url_type == "url") { + # tibble::as_tibble(data) |> + # dplyr::left_join( + # data_transformed, + # dplyr::join_by(value == postcode) + # ) |> + # dplyr::rename(postcode = value) } else if (is.data.frame(data)) { data |> dplyr::left_join( @@ -160,36 +143,3 @@ get_data <- function(data, data } } - -#' use batched IDs to retrieve table data -#' -#' @param req used in function \code{\link{get_data}} -#' @param ids_vec used in function \code{\link{get_data}} -#' -#' @return function -#' @export -retrieve_data <- function(req, ids_vec) { - ids <- stringr::str_flatten(ids_vec, collapse = ",") - req |> - httr2::req_url_query(objectIds = ids) |> - httr2::req_url_query(outFields = "*") |> # returns all columns - httr2::req_url_query(returnGeometry = FALSE) |> # superfluous tbf - httr2::req_retry(max_tries = 3) |> # shouldn't be needed - httr2::req_perform() -} - - -#' pull actual data out from API JSON response -#' -#' @param respused in function \code{\link{get_data}} -#' -#' @return function -#' @export -pull_table_data <- function(resp) { - resp |> - httr2::resp_check_status() |> - httr2::resp_body_json() |> - purrr::pluck("features") |> - purrr::map_df("attributes") |> - janitor::clean_names() -} diff --git a/man/api_url.Rd b/man/api_url.Rd index 92256a1..c616f14 100644 --- a/man/api_url.Rd +++ b/man/api_url.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_data.R +% Please edit documentation in R/get_data-helper.R \name{api_url} \alias{api_url} \title{Getting data from the IMD api} diff --git a/man/is_lsoa.Rd b/man/is_lsoa.Rd index 74f83e1..515a336 100644 --- a/man/is_lsoa.Rd +++ b/man/is_lsoa.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/postcode-helper.R +% Please edit documentation in R/checker-functions.R \name{is_lsoa} \alias{is_lsoa} \title{Function to check if a string is an LSOA code} diff --git a/man/is_postcode.Rd b/man/is_postcode.Rd index b099aba..7003b3d 100644 --- a/man/is_postcode.Rd +++ b/man/is_postcode.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/postcode-helper.R +% Please edit documentation in R/checker-functions.R \name{is_postcode} \alias{is_postcode} \title{Function to check if a string is a valid postcode regardless of its diff --git a/man/pull_table_data.Rd b/man/pull_table_data.Rd deleted file mode 100644 index bde1b20..0000000 --- a/man/pull_table_data.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_data.R -\name{pull_table_data} -\alias{pull_table_data} -\title{pull actual data out from API JSON response} -\usage{ -pull_table_data(resp) -} -\arguments{ -\item{respused}{in function \code{\link{get_data}}} -} -\value{ -function -} -\description{ -pull actual data out from API JSON response -} diff --git a/man/retrieve_data.Rd b/man/retrieve_data.Rd deleted file mode 100644 index 10a24cc..0000000 --- a/man/retrieve_data.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_data.R -\name{retrieve_data} -\alias{retrieve_data} -\title{use batched IDs to retrieve table data} -\usage{ -retrieve_data(req, ids_vec) -} -\arguments{ -\item{req}{used in function \code{\link{get_data}}} - -\item{ids_vec}{used in function \code{\link{get_data}}} -} -\value{ -function -} -\description{ -use batched IDs to retrieve table data -} diff --git a/tests/testthat/test-get_data.R b/tests/testthat/test-get_data.R index 4995a6b..2822ff7 100644 --- a/tests/testthat/test-get_data.R +++ b/tests/testthat/test-get_data.R @@ -235,4 +235,3 @@ httptest2::with_mock_dir("imd", { ) }) }) - From 3593740bc3580301676f30d1b54bf5a62571c200 Mon Sep 17 00:00:00 2001 From: Lextuga007 Date: Wed, 20 Mar 2024 17:38:09 +0000 Subject: [PATCH 4/4] Changed columns to search for the name --- R/get_data.R | 99 ++++++++++++++++++---------------- tests/testthat/test-get_data.R | 10 +++- 2 files changed, 63 insertions(+), 46 deletions(-) diff --git a/R/get_data.R b/R/get_data.R index e2bb7c6..d219eb6 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -37,13 +37,14 @@ get_data <- function(data, column <- rlang::as_string(column) # Check there is corresponding type data somewhere in data frame + # Use this to allow for other column names to be used in later code is_postcode_check <- sum(is_postcode(as.vector(t(data))), na.rm = TRUE) is_lsoa_check <- sum(is_lsoa(as.vector(t(data))), na.rm = TRUE) - if (column == "default" && is_postcode_check == 0 && is_lsoa_check > 0) { - column <- "lsoa11" - } else if (column == "default" && url_type == "postcode") { + if ("postcode" %in% colnames(data)) { column <- "postcode" + } else if ("lsoa11" %in% colnames(data)) { + column <- "lsoa11" } else { column <- rlang::eval_tidy(rlang::quo(column)) } @@ -62,7 +63,11 @@ get_data <- function(data, fix_invalid = fix_invalid, var = column # Not required but doesn't cause error ) - } else if (is.atomic(data) && is_postcode_check == 0 && + } + + ## Generate specific text for the url + + if (is.atomic(data) && is_postcode_check == 0 && is_lsoa_check > 0) { text <- paste0( "LSOA11CD IN ('", @@ -80,13 +85,13 @@ get_data <- function(data, collapse = "', '" ), "')" ) - } else { - data - } - - if (is_postcode_check == 0 && is_lsoa_check > 0 | url_type == "imd") { - data_out <- imd_api(text = text, - req = req) + } else if (exists("data_transformed") && url_type == "imd") { + text <- paste0( + "LSOA11CD IN ('", + paste(data_transformed$lsoa_code, + collapse = "', '" + ), "')" + ) } # Because APIs only return data where a match has been made which results in @@ -94,52 +99,56 @@ get_data <- function(data, # Postcode information is passed through {NHSRpostcodetools} which handles # this but IMD is handled here. - if (exists("data_transformed") && is.data.frame(data) && - url_type == "postcode") { - data |> - dplyr::left_join( - data_transformed - ) - } else if (exists("data_transformed") && is.atomic(data) && - url_type == "postcode") { - tibble::as_tibble(data) |> - dplyr::left_join( - data_transformed, - dplyr::join_by(value == postcode) - ) |> - dplyr::rename(postcode = value) - } else if (exists("data_transformed") && is.data.frame(data) && - url_type == "url") { - data |> + if (exists("data_transformed") && is.data.frame(data)) { + pc_data <- data |> dplyr::left_join( data_transformed - ) |> - dplyr::left_join( - data_out, - dplyr::join_by(lsoa_code == lsoa) ) - # } else if (exists("data_transformed") && is.atomic(data) && - # url_type == "url") { - # tibble::as_tibble(data) |> - # dplyr::left_join( - # data_transformed, - # dplyr::join_by(value == postcode) - # ) |> - # dplyr::rename(postcode = value) - } else if (is.data.frame(data)) { - data |> + } else if (exists("data_transformed") && is.atomic(data)) { + pc_data <- data_transformed + } + + + ## IMD data + + if (is_postcode_check == 0 && is_lsoa_check > 0 && + is.data.frame(data)) { + data_out <- imd_api( + text = text, + req = req + ) + + imd_data <- data |> dplyr::left_join( data_out, dplyr::join_by({{ column }} == lsoa11cd) ) - } else if (is.atomic(data)) { - tibble::as_tibble(data) |> + } else if (is_postcode_check == 0 && is_lsoa_check > 0 && is.atomic(data)) { + data_out <- imd_api( + text = text, + req = req + ) + + imd_data <- tibble::as_tibble(data) |> dplyr::left_join( data_out, dplyr::join_by(value == lsoa11cd) ) |> dplyr::rename(lsoa11 = value) + } + + ## Final data + + if (exists("pc_data") && url_type == "imd") { + data_out <- imd_api( + text = text, + req = req + ) + + data_out + } else if (exists("pc_data") && url_type == "postcode") { + pc_data } else { - data + imd_data } } diff --git a/tests/testthat/test-get_data.R b/tests/testthat/test-get_data.R index 2822ff7..9ba7b00 100644 --- a/tests/testthat/test-get_data.R +++ b/tests/testthat/test-get_data.R @@ -2,7 +2,7 @@ postcodes <- c("HD1 2UT", "HD1 2UU", "HD1 2UV") -imd <- c("E01011107", "E01011229", "E01000002") +imd <- c("E01011107", "E01011229", "E01002") # # Taken from # # www.gov.uk/government/statistics/english-indices-of-deprivation-2019 @@ -226,6 +226,14 @@ httptest2::with_mock_dir("imd", { # vectors + testthat::expect_equal( + nrow(get_data(imd)), n_rows + ) + + testthat::expect_equal( + ncol(get_data(imd)), n_col_vector + ) + testthat::expect_equal( nrow(get_data(imd, url_type = "imd")), n_rows )