From 5717888a3a14c72ddf8b26903ae729a7cb9e6d19 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 23 Jun 2023 09:51:31 -0700 Subject: [PATCH 01/17] move to httr2 --- DESCRIPTION | 1 + R/utils.R | 8 ++++++-- tests/testthat/_snaps/utils.md | 7 +++++++ tests/testthat/test_utils.R | 13 +++---------- 4 files changed, 17 insertions(+), 12 deletions(-) create mode 100644 tests/testthat/_snaps/utils.md diff --git a/DESCRIPTION b/DESCRIPTION index 950a875..6d374ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: dbplyr (>= 1.1.0), dplyr (>= 0.7.4), httr (>= 1.3.1), + httr2, lubridate (>= 1.6.0), rappdirs (>= 0.3.1), readr (>= 1.1.1), diff --git a/R/utils.R b/R/utils.R index 1eb7f27..ee698c2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -184,8 +184,12 @@ ask <- function(...) { # issues and fail with an informative error # message on where to download HYDAT. #' @noRd -network_check <- function(url) { - tryCatch(httr::GET(url), +network_check <- function(url, proxy_url = NULL, proxy_port = NULL) { + req <- httr2::request(base_url = url) + if (!is.null(proxy_url) && !is.null(proxy_port)) { + req <- httr2::req_proxy(req, url = proxy_url, port = proxy_port) + } + tryCatch(httr2::req_perform(req), error = function(e) { if (grepl("Timeout was reached:", e$message)) { stop(paste0("Could not connect to HYDAT source. Check your connection settings. diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md new file mode 100644 index 0000000..a4eb3e3 --- /dev/null +++ b/tests/testthat/_snaps/utils.md @@ -0,0 +1,7 @@ +# downloading hydat fails behind a proxy server with informative error message + + Could not connect to HYDAT source. Check your connection settings. + Try downloading HYDAT_sqlite3 from this url: + [http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/] + and unzipping the saved file to this directory: /Users/samalbers/Library/Application Support/tidyhydat + diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index ff6498a..feeb4a3 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -25,17 +25,10 @@ test_that("hy_version returns a dataframe and works", { test_that("downloading hydat fails behind a proxy server with informative error message", { skip_on_cran() - httr::set_config(httr::use_proxy("64.251.21.73", 8080), override = TRUE) base_url_cmc <- "http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/" - expect_error(tidyhydat:::network_check(base_url_cmc), message = paste0( - "Error: Could not connect to HYDAT source.", - "Check your connection settings.", - "Try downloading HYDAT_sqlite3 from this url: ", - "[http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/]", - "and unzipping the saved file to this directory: ", - hy_dir() - )) - httr::reset_config() + expect_snapshot_error( + tidyhydat:::network_check(base_url_cmc, "64.251.21.73", 8080) + ) }) From aa0c379543b629930a06591a629e00838a17c447 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 23 Jun 2023 09:57:53 -0700 Subject: [PATCH 02/17] don't need that snapshot test in CI --- tests/testthat/test_utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index feeb4a3..5e929c1 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -25,6 +25,7 @@ test_that("hy_version returns a dataframe and works", { test_that("downloading hydat fails behind a proxy server with informative error message", { skip_on_cran() + skip_on_ci() base_url_cmc <- "http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/" expect_snapshot_error( tidyhydat:::network_check(base_url_cmc, "64.251.21.73", 8080) From 7f89e321a3ca4de3a4184828527a736c19d1db2b Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 23 Jun 2023 10:35:12 -0700 Subject: [PATCH 03/17] change download_hydat --- R/download.R | 37 ++++++++++++++++++++++--------------- R/utils.R | 6 ++++++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/R/download.R b/R/download.R index 021f765..31b6a64 100644 --- a/R/download.R +++ b/R/download.R @@ -56,9 +56,15 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) { new_hydat <- hy_remote() # Make the download URL url <- paste0(hy_base_url(), "Hydat_sqlite3_", new_hydat, ".zip") - response <- httr::HEAD(url) - httr::stop_for_status(response) - size <- round(as.numeric(httr::headers(response)[["Content-Length"]]) / 1000000, 0) + req <- httr2::request(url) + req <- httr2::req_method(req, "HEAD") + req <- tidyhydat_agent(req) + req <- req_perform(req) + httr2::resp_check_status(req) + + size <- round(as.numeric( + httr2::resp_header(req, "Content-Length") + ) / 1000000, 0) ## Do we need to download a new version? @@ -77,11 +83,11 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) { if (!dl_overwrite) { info("HYDAT is updated on a quarterly basis, check again soon for an updated version.") } - + browser() if (new_hydat != existing_hydat & ask) { # New DB available or no local DB at all msg <- paste0( - "Downloading HYDAT will take up to 10 minutes (", - size, " MB). \nThis will remove any older versions of HYDAT, if applicable. \nIs that okay?" + "This version of HYDAT is ", size, "MB in size and will take some time to download. + \nThis will remove any older versions of HYDAT, if applicable. \nIs that okay?" ) ans <- ask(msg) } else { @@ -106,12 +112,10 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) { tmp <- tempfile("hydat_", fileext = ".zip") ## Download the zip file - res <- httr::GET( - url, httr::write_disk(tmp), httr::progress("down"), - httr::user_agent("https://github.com/ropensci/tidyhydat") - ) - on.exit(file.remove(tmp), add = TRUE) - httr::stop_for_status(res) + hydb_req <- httr2::request(url) + hydb_req <- tidyhydat_agent(hydb_req) + resp <- req_perform(hydb_req, tmp) + httr2::resp_check_status(resp) ## Extract the file to a temporary dir if (file.exists(tmp)) info("Extracting HYDAT") @@ -153,10 +157,13 @@ hy_remote <- function() { # Run network check network_check(hy_base_url()) - x <- httr::GET(hy_base_url()) - httr::stop_for_status(x) + req <- httr2::request(hy_base_url()) + req <- httr2::req_perform(req) + resp <- httr2::resp_check_status(req) + + raw_date <- substr( - gsub("^.*\\Hydat_sqlite3_", "", httr::content(x, "text")), + gsub("^.*\\Hydat_sqlite3_", "", httr2::resp_body_string(req)), 1, 8 ) diff --git a/R/utils.R b/R/utils.R index ee698c2..d2e2a10 100644 --- a/R/utils.R +++ b/R/utils.R @@ -203,6 +203,12 @@ network_check <- function(url, proxy_url = NULL, proxy_port = NULL) { ) } +tidyhydat_agent <- function(req) { + httr2::req_user_agent( + req, + string = "https://github.com/ropensci/tidyhydat") +} + #' Convenience function to pull station number from tidyhydat functions #' From 541e05a0be4f80728f211922bc7d99bc9294575a Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 23 Jun 2023 21:52:12 -0700 Subject: [PATCH 04/17] webservice --- R/realtime-webservice.R | 72 ++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/R/realtime-webservice.R b/R/realtime-webservice.R index 7a3e277..5c277b1 100755 --- a/R/realtime-webservice.R +++ b/R/realtime-webservice.R @@ -66,12 +66,10 @@ #' @export -realtime_ws <- function(station_number, parameters = NULL, - start_date = Sys.Date() - 30, end_date = Sys.Date()) { - if (is_mac()) { - # temporary patch to work around vroom 1.6.4 bug - readr::local_edition(1) - } +realtime_ws <- function(station_number, + parameters = NULL, + start_date = Sys.Date() - 30, + end_date = Sys.Date()) { if (is.null(parameters)) parameters <- c(46, 16, 52, 47, 8, 5, 41, 18) @@ -92,17 +90,26 @@ realtime_ws <- function(station_number, parameters = NULL, if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", start_date)) { - stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE) + stop( + "Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", + call. = FALSE + ) } if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", end_date)) { - stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE) + stop( + "Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", + call. = FALSE + ) } if (!is.null(start_date) & !is.null(end_date)) { if (lubridate::ymd_hms(end_date) < lubridate::ymd_hms(start_date)) { - stop("start_date is after end_date. Try swapping values.", call. = FALSE) + stop( + "start_date is after end_date. Try swapping values.", + call. = FALSE + ) } } @@ -113,13 +120,17 @@ realtime_ws <- function(station_number, parameters = NULL, ## Build link for GET baseurl <- "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?" + + station_string <- paste0("stations[]=", station_number, collapse = "&") parameters_string <- paste0("parameters[]=", parameters, collapse = "&") - date_string <- paste0("start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19), - "&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19)) + date_string <- paste0( + "start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19), + "&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19) + ) ## paste them all together - url_for_GET <- paste0( + query_url <- paste0( baseurl, station_string, "&", parameters_string, "&", @@ -127,26 +138,28 @@ realtime_ws <- function(station_number, parameters = NULL, ) ## Get data - get_ws <- httr::GET(url_for_GET, httr::user_agent("https://github.com/ropensci/tidyhydat")) + req <- httr2::request(query_url) + req <- tidyhydat_agent(req) + resp <- httr2::req_perform(req) ## Give webservice some time Sys.sleep(1) - - ## Check the GET status - httr::stop_for_status(get_ws) - if (httr::headers(get_ws)$`content-type` != "text/csv; charset=utf-8") { - stop("GET response is not a csv file") + ## Check the respstatus + httr2::resp_check_status(resp) + + + if (httr2::resp_headers(resp)$`Content-Type` != "text/csv; charset=utf-8") { + stop("Response is not a csv file") } ## Turn it into a tibble and specify correct column classes - csv_df <- httr::content( - get_ws, - type = "text/csv", - encoding = "UTF-8", + csv_df <- readr::read_csv( + httr2::resp_body_string(resp), col_types = "cTidccc" - ) + ) + ## Check here to see if csv_df has any data in it if (nrow(csv_df) == 0) { @@ -154,15 +167,17 @@ realtime_ws <- function(station_number, parameters = NULL, } ## Rename columns to reflect tidyhydat naming - colnames(csv_df) <- c("STATION_NUMBER","Date","Parameter","Value","Grade","Symbol","Approval") + colnames(csv_df) <- c("STATION_NUMBER", "Date", "Parameter", "Value", "Grade", "Symbol", "Approval") csv_df <- dplyr::left_join( csv_df, dplyr::select(tidyhydat::param_id, -Name_Fr), by = c("Parameter") ) - csv_df <- dplyr::select(csv_df, STATION_NUMBER, Date, Name_En, Value, Unit, - Grade, Symbol, Approval, Parameter, Code) + csv_df <- dplyr::select( + csv_df, STATION_NUMBER, Date, Name_En, Value, Unit, + Grade, Symbol, Approval, Parameter, Code + ) ## What stations were missed? differ <- setdiff(unique(station_number), unique(csv_df$STATION_NUMBER)) @@ -170,8 +185,7 @@ realtime_ws <- function(station_number, parameters = NULL, if (length(differ) <= 10) { message("The following station(s) were not retrieved: ", paste0(differ, sep = " ")) message("Check station number for typos or if it is a valid station in the network") - } - else { + } else { message("More than 10 stations from the initial query were not returned. Ensure realtime and active status are correctly specified.") } } else { @@ -180,7 +194,7 @@ realtime_ws <- function(station_number, parameters = NULL, p_differ <- setdiff(unique(parameters), unique(csv_df$Parameter)) if (length(p_differ) != 0) { - message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " ")) + message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " ")) } else { message("All parameters successfully retrieved") } From c214ebde630120dcd741dc885bbc89c00af5269e Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 23 Jun 2023 21:52:22 -0700 Subject: [PATCH 05/17] adjust some messages --- R/realtime_plot.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/realtime_plot.R b/R/realtime_plot.R index bfc1497..2a07df6 100644 --- a/R/realtime_plot.R +++ b/R/realtime_plot.R @@ -30,10 +30,10 @@ plot.realtime <- function(x = NULL, Parameter = c("Flow", "Level"), ...) { Parameter <- match.arg(Parameter) if (length(unique(x$STATION_NUMBER)) > 1L) { - stop("realtime plot methods only work with objects that contain one station", call. = FALSE) + stop("realtime plots only work with objects that contain one station", call. = FALSE) } - if (is.null(x)) stop("Station(s) not present in the datamart") + if (is.null(x)) stop("Station not present in the datamart") ## Catch mis labelled parameter if (Parameter == "Level" && ((nrow(x[x$Parameter == "Level", ]) == 0) | all(is.na(x[x$Parameter == "Level", ]$Value)))) { From 4db6a807642e4105b0920e19b8e5dc1be2eed956 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Wed, 3 Jan 2024 21:27:14 -0800 Subject: [PATCH 06/17] add tidyhydat_perform --- R/download.R | 7 +++---- R/utils.R | 6 ++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/R/download.R b/R/download.R index 31b6a64..efe7b86 100644 --- a/R/download.R +++ b/R/download.R @@ -59,7 +59,7 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) { req <- httr2::request(url) req <- httr2::req_method(req, "HEAD") req <- tidyhydat_agent(req) - req <- req_perform(req) + req <- tidyhydat_perform(req) httr2::resp_check_status(req) size <- round(as.numeric( @@ -83,7 +83,6 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) { if (!dl_overwrite) { info("HYDAT is updated on a quarterly basis, check again soon for an updated version.") } - browser() if (new_hydat != existing_hydat & ask) { # New DB available or no local DB at all msg <- paste0( "This version of HYDAT is ", size, "MB in size and will take some time to download. @@ -114,7 +113,7 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) { ## Download the zip file hydb_req <- httr2::request(url) hydb_req <- tidyhydat_agent(hydb_req) - resp <- req_perform(hydb_req, tmp) + resp <- tidyhydat_perform(hydb_req, path = tmp) httr2::resp_check_status(resp) ## Extract the file to a temporary dir @@ -158,7 +157,7 @@ hy_remote <- function() { network_check(hy_base_url()) req <- httr2::request(hy_base_url()) - req <- httr2::req_perform(req) + req <- tidyhydat_perform(req) resp <- httr2::resp_check_status(req) diff --git a/R/utils.R b/R/utils.R index d2e2a10..cf94dbd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -256,4 +256,10 @@ hy_expected_tbls <- function() { is_mac <- function() { system_info <- Sys.info() grepl("darwin", tolower(system_info["sysname"])) +} + +tidyhydat_perform <- function(req, ...) { + req <- httr2::req_retry(req, max_tries = 5) + req <- httr2::req_progress(req) + httr2::req_perform(req, ...) } \ No newline at end of file From 468850efd22c4be3c6f2f0d77bb72bf34ee31efa Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Thu, 3 Oct 2024 15:41:16 -0700 Subject: [PATCH 07/17] drop from imports --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6d374ff..c3d5ba4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,6 @@ Imports: DBI (>= 0.7), dbplyr (>= 1.1.0), dplyr (>= 0.7.4), - httr (>= 1.3.1), httr2, lubridate (>= 1.6.0), rappdirs (>= 0.3.1), From 638ffc8cf56accf141e60710588eff7aa7d2ec7a Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Thu, 3 Oct 2024 20:27:04 -0700 Subject: [PATCH 08/17] use for stations --- R/realtime.R | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/R/realtime.R b/R/realtime.R index 336a21c..7272678 100644 --- a/R/realtime.R +++ b/R/realtime.R @@ -104,19 +104,12 @@ realtime_stations <- function(prov_terr_state_loc = NULL) { url_check <- httr::GET(realtime_link, httr::user_agent("https://github.com/ropensci/tidyhydat")) - ## Checking to make sure the link is valid - if (httr::http_error(url_check) == "TRUE") { - stop(paste0(realtime_link, " is not a valid url. Datamart may be down or the url has changed.")) - } - - if (is_mac()) { - # temporary patch to work around vroom 1.6.4 bug - readr::local_edition(1) - } + req <- httr2::request(realtime_link) + resp <- tidyhydat_perform(req) + resp_str <- httr2::resp_body_string(resp) - net_tibble <- httr::content(url_check, - type = "text/csv", - encoding = "UTF-8", + net_tibble <- readr::read_csv( + resp_str, skip = 1, col_names = c( "STATION_NUMBER", @@ -141,7 +134,7 @@ realtime_stations <- function(prov_terr_state_loc = NULL) { } - as.realtime(dplyr::filter(net_tibble, PROV_TERR_STATE_LOC %in% prov)) + as.realtime(net_tibble[net_tibble$PROV_TERR_STATE_LOC %in% prov, ]) } #' Add local datetime column to realtime tibble From fb0072a52fe1e14dc405c75c8fe89111be8e0475 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Thu, 3 Oct 2024 20:37:32 -0700 Subject: [PATCH 09/17] drop from csv --- R/realtime.R | 7 +-- R/utils-realtime.R | 105 +++++++++++++++++---------------------------- 2 files changed, 41 insertions(+), 71 deletions(-) diff --git a/R/realtime.R b/R/realtime.R index 7272678..77e9339 100644 --- a/R/realtime.R +++ b/R/realtime.R @@ -101,12 +101,7 @@ realtime_stations <- function(prov_terr_state_loc = NULL) { prov <- prov_terr_state_loc realtime_link <- "https://dd.weather.gc.ca/hydrometric/doc/hydrometric_StationList.csv" - - url_check <- httr::GET(realtime_link, httr::user_agent("https://github.com/ropensci/tidyhydat")) - - req <- httr2::request(realtime_link) - resp <- tidyhydat_perform(req) - resp_str <- httr2::resp_body_string(resp) + resp_str <- tidyhydat_realtime_csv_parser(realtime_link) net_tibble <- readr::read_csv( resp_str, diff --git a/R/utils-realtime.R b/R/utils-realtime.R index f87e156..4802602 100644 --- a/R/utils-realtime.R +++ b/R/utils-realtime.R @@ -11,12 +11,15 @@ # See the License for the specific language governing permissions and limitations under the License. ############################################### +tidyhydat_realtime_csv_parser <- function(file) { + req <- httr2::request(file) + req <- httr2::req_user_agent(req, "https://github.com/ropensci/tidyhydat") + resp <- tidyhydat_perform(req) + httr2::resp_body_string(resp) +} + ## Get realtime station data - single station single_realtime_station <- function(station_number) { - if (is_mac()) { - # temporary patch to work around vroom 1.6.4 bug - readr::local_edition(1) - } ## If station is provided if (!is.null(station_number)) { sym_STATION_NUMBER <- sym("STATION_NUMBER") @@ -62,71 +65,43 @@ single_realtime_station <- function(station_number) { "Flow_CODE" ) - url_check <- httr::GET(infile[1], httr::user_agent("https://github.com/ropensci/tidyhydat")) - ## check if a valid url - if (httr::http_error(url_check) == TRUE) { - info(paste0("No hourly data found for ", STATION_NUMBER_SEL)) - - h <- dplyr::tibble( - A = STATION_NUMBER_SEL, B = NA, C = NA, D = NA, E = NA, - F = NA, G = NA, H = NA, I = NA, J = NA - ) - - colnames(h) <- colHeaders - } else { - h <- httr::content( - url_check, - type = "text/csv", - encoding = "UTF-8", - skip = 1, - col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() - ) + h_resp_str <- tidyhydat_realtime_csv_parser(infile[1]) + h <- readr::read_csv( + h_resp_str, + col_names = colHeaders, + col_types = readr::cols( + STATION_NUMBER = readr::col_character(), + Date = readr::col_datetime(), + Level = readr::col_double(), + Level_GRADE = readr::col_character(), + Level_SYMBOL = readr::col_character(), + Level_CODE = readr::col_integer(), + Flow = readr::col_double(), + Flow_GRADE = readr::col_character(), + Flow_SYMBOL = readr::col_character(), + Flow_CODE = readr::col_integer() ) - } + ) # download daily file - url_check_d <- httr::GET(infile[2], httr::user_agent("https://github.com/ropensci/tidyhydat")) - ## check if a valid url - if (httr::http_error(url_check_d) == TRUE) { - info(paste0("No daily data found for ", STATION_NUMBER_SEL)) - - d <- dplyr::tibble( - A = STATION_NUMBER_SEL, B = NA, C = NA, D = NA, E = NA, - F = NA, G = NA, H = NA, I = NA, J = NA - ) - colnames(d) <- colHeaders - } else { - d <- httr::content( - url_check_d, - type = "text/csv", - encoding = "UTF-8", - skip = 1, - col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() - ) + p_resp_str <- tidyhydat_realtime_csv_parser(infile[2]) + + d <- readr::read_csv( + p_resp_str, + col_names = colHeaders, + col_types = readr::cols( + STATION_NUMBER = readr::col_character(), + Date = readr::col_datetime(), + Level = readr::col_double(), + Level_GRADE = readr::col_character(), + Level_SYMBOL = readr::col_character(), + Level_CODE = readr::col_integer(), + Flow = readr::col_double(), + Flow_GRADE = readr::col_character(), + Flow_SYMBOL = readr::col_character(), + Flow_CODE = readr::col_integer() ) - } + ) # now merge the hourly + daily (hourly data overwrites daily where dates are the same) p <- dplyr::filter(d, Date < min(h$Date)) From 4e3a4ab5c6a3806cb415675744884272a7789317 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Thu, 3 Oct 2024 20:39:53 -0700 Subject: [PATCH 10/17] all time --- R/utils-realtime.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/R/utils-realtime.R b/R/utils-realtime.R index 4802602..ab3b8c7 100644 --- a/R/utils-realtime.R +++ b/R/utils-realtime.R @@ -115,9 +115,7 @@ all_realtime_station <- function(PROV) { base_url <- "https://dd.weather.gc.ca/hydrometric/csv/" prov_url <- paste0(base_url, PROV, "/daily/", PROV, "_daily_hydrometric.csv") - res <- httr::GET(prov_url, httr::progress("down"), httr::user_agent("https://github.com/ropensci/tidyhydat")) - - httr::stop_for_status(res) + res <- tidyhydat_realtime_csv_parser(prov_url) # Define column names as the same as HYDAT colHeaders <- @@ -134,11 +132,8 @@ all_realtime_station <- function(PROV) { "Flow_CODE" ) - output <- httr::content( + output <- readr::read_csv( res, - type = "text/csv", - encoding = "UTF-8", - skip = 1, col_names = colHeaders, col_types = readr::cols( STATION_NUMBER = readr::col_character(), From 52be07f5370f5c8558331d4a04fde3e8c2efa48a Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Thu, 3 Oct 2024 20:43:10 -0700 Subject: [PATCH 11/17] last one? --- R/zzz.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 5aae172..a64bf81 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -18,13 +18,12 @@ packageStartupMessage(info("Checking for a new version of HYDAT...")) base_url <- "http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/" - x <- httr::GET(base_url) - httr::stop_for_status(x) - + x <- tidyhydat_realtime_csv_parser(base_url) + ## Extract newest HYDAT new_hydat <- as.Date(substr(gsub( "^.*\\Hydat_sqlite3_", "", - httr::content(x, "text") + x ), 1, 8), "%Y%m%d") ## Compare that to existing HYDAT From c1713ef269b06399a95833ed55bdfc9b0c37ba76 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 4 Oct 2024 09:16:28 -0700 Subject: [PATCH 12/17] wip error handling --- NEWS.md | 1 + R/utils-realtime.R | 88 ++++++++++++++++--------- tests/testthat/test_download_realtime.R | 8 +-- 3 files changed, 62 insertions(+), 35 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2d17746..479ff79 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,5 @@ # tidyhydat 0.6.2 +- dropped httr in favour of httr2 - fix bug where `download_hydat()` fails if `tempdir()` is on a different device than `hydat_path` (@mpdavison, #192) - fix bug where `download_hydat()` wasn't checking alternative paths for success (@Travis-Simmons) diff --git a/R/utils-realtime.R b/R/utils-realtime.R index ab3b8c7..c2cf037 100644 --- a/R/utils-realtime.R +++ b/R/utils-realtime.R @@ -14,8 +14,15 @@ tidyhydat_realtime_csv_parser <- function(file) { req <- httr2::request(file) req <- httr2::req_user_agent(req, "https://github.com/ropensci/tidyhydat") + req <- httr2::req_error(req, is_error = function(resp) FALSE) + req <- httr2::req_retry(req, max_tries = 3) resp <- tidyhydat_perform(req) - httr2::resp_body_string(resp) + if (httr2::resp_status(resp) == 404) { + resp <- NA_character_ + } else { + resp <- httr2::resp_body_string(resp) + } + resp } ## Get realtime station data - single station @@ -66,42 +73,61 @@ single_realtime_station <- function(station_number) { ) h_resp_str <- tidyhydat_realtime_csv_parser(infile[1]) - h <- readr::read_csv( - h_resp_str, - col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() + if (is.na(h_resp_str)) { + h <- dplyr::tibble( + A = station_number, B = NA, C = NA, D = NA, E = NA, + F = NA, G = NA, H = NA, I = NA, J = NA ) - ) + colnames(h) <- colHeaders + } else { + h <- readr::read_csv( + h_resp_str, + col_names = colHeaders, + col_types = readr::cols( + STATION_NUMBER = readr::col_character(), + Date = readr::col_datetime(), + Level = readr::col_double(), + Level_GRADE = readr::col_character(), + Level_SYMBOL = readr::col_character(), + Level_CODE = readr::col_integer(), + Flow = readr::col_double(), + Flow_GRADE = readr::col_character(), + Flow_SYMBOL = readr::col_character(), + Flow_CODE = readr::col_integer() + ) + ) + } + # download daily file p_resp_str <- tidyhydat_realtime_csv_parser(infile[2]) - d <- readr::read_csv( - p_resp_str, - col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() + if (is.na(p_resp_str)) { + d <- dplyr::tibble( + A = station_number, B = NA, C = NA, D = NA, E = NA, + F = NA, G = NA, H = NA, I = NA, J = NA ) - ) + colnames(h) <- colHeaders + } else { + d <- readr::read_csv( + p_resp_str, + col_names = colHeaders, + col_types = readr::cols( + STATION_NUMBER = readr::col_character(), + Date = readr::col_datetime(), + Level = readr::col_double(), + Level_GRADE = readr::col_character(), + Level_SYMBOL = readr::col_character(), + Level_CODE = readr::col_integer(), + Flow = readr::col_double(), + Flow_GRADE = readr::col_character(), + Flow_SYMBOL = readr::col_character(), + Flow_CODE = readr::col_integer() + ) + ) + } + + # now merge the hourly + daily (hourly data overwrites daily where dates are the same) p <- dplyr::filter(d, Date < min(h$Date)) diff --git a/tests/testthat/test_download_realtime.R b/tests/testthat/test_download_realtime.R index e76e5ee..0495f39 100644 --- a/tests/testthat/test_download_realtime.R +++ b/tests/testthat/test_download_realtime.R @@ -1,5 +1,5 @@ test_that("realtime_dd returns the correct data header", { - # skip_on_travis() + skip_on_cran() expect_identical( colnames(realtime_dd(station_number = "08MF005", prov_terr_state_loc = "BC")), @@ -8,7 +8,7 @@ test_that("realtime_dd returns the correct data header", { }) test_that("realtime_dd can download stations from a whole province using prov_terr_state_loc and stores query time", { - # skip_on_travis() + skip_on_cran() expected_columns <- c( "STATION_NUMBER", "PROV_TERR_STATE_LOC", "Date", "Parameter", @@ -22,13 +22,13 @@ test_that("realtime_dd can download stations from a whole province using prov_te test_that("realtime_dd can download stations from multiple provinces using station_number", { - # skip_on_travis() + skip_on_cran() expect_error(realtime_dd(station_number = c("01CD005", "08MF005")), regexp = NA) }) test_that("When station_number is ALL there is an error", { - # skip_on_travis() + skip_on_cran() expect_error(realtime_dd(station_number = "ALL")) }) From e71184eee868153b5d99b919469dfd02ec413af3 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 4 Oct 2024 09:37:30 -0700 Subject: [PATCH 13/17] bit more clean up --- R/realtime.R | 2 +- R/utils-realtime.R | 115 ++++++++++++++++++--------------------------- R/zzz.R | 2 +- 3 files changed, 47 insertions(+), 72 deletions(-) diff --git a/R/realtime.R b/R/realtime.R index 77e9339..58d3c17 100644 --- a/R/realtime.R +++ b/R/realtime.R @@ -101,7 +101,7 @@ realtime_stations <- function(prov_terr_state_loc = NULL) { prov <- prov_terr_state_loc realtime_link <- "https://dd.weather.gc.ca/hydrometric/doc/hydrometric_StationList.csv" - resp_str <- tidyhydat_realtime_csv_parser(realtime_link) + resp_str <- realtime_parser(realtime_link) net_tibble <- readr::read_csv( resp_str, diff --git a/R/utils-realtime.R b/R/utils-realtime.R index c2cf037..fe3643c 100644 --- a/R/utils-realtime.R +++ b/R/utils-realtime.R @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and limitations under the License. ############################################### -tidyhydat_realtime_csv_parser <- function(file) { +realtime_parser <- function(file) { req <- httr2::request(file) req <- httr2::req_user_agent(req, "https://github.com/ropensci/tidyhydat") req <- httr2::req_error(req, is_error = function(resp) FALSE) @@ -58,78 +58,46 @@ single_realtime_station <- function(station_number) { ) # Define column names as the same as HYDAT - colHeaders <- - c( - "STATION_NUMBER", - "Date", - "Level", - "Level_GRADE", - "Level_SYMBOL", - "Level_CODE", - "Flow", - "Flow_GRADE", - "Flow_SYMBOL", - "Flow_CODE" - ) + colHeaders <- realtime_cols_headers() - h_resp_str <- tidyhydat_realtime_csv_parser(infile[1]) + h_resp_str <- realtime_parser(infile[1]) if (is.na(h_resp_str)) { h <- dplyr::tibble( A = station_number, B = NA, C = NA, D = NA, E = NA, F = NA, G = NA, H = NA, I = NA, J = NA ) colnames(h) <- colHeaders + h <- readr::type_convert(h, realtime_cols_types()) } else { h <- readr::read_csv( h_resp_str, + skip = 1, col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() - ) + col_types = realtime_cols_types() ) } # download daily file - p_resp_str <- tidyhydat_realtime_csv_parser(infile[2]) + p_resp_str <- realtime_parser(infile[2]) if (is.na(p_resp_str)) { d <- dplyr::tibble( A = station_number, B = NA, C = NA, D = NA, E = NA, F = NA, G = NA, H = NA, I = NA, J = NA ) - colnames(h) <- colHeaders + colnames(d) <- colHeaders + d <- readr::type_convert(d, realtime_cols_types()) } else { d <- readr::read_csv( p_resp_str, + skip = 1, col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() - ) + col_types = realtime_cols_types() ) } - - - # now merge the hourly + daily (hourly data overwrites daily where dates are the same) + # now append the hourly + daily (hourly data overwrites daily where dates are the same) p <- dplyr::filter(d, Date < min(h$Date)) output <- dplyr::bind_rows(p, h) @@ -141,38 +109,15 @@ all_realtime_station <- function(PROV) { base_url <- "https://dd.weather.gc.ca/hydrometric/csv/" prov_url <- paste0(base_url, PROV, "/daily/", PROV, "_daily_hydrometric.csv") - res <- tidyhydat_realtime_csv_parser(prov_url) + res <- realtime_parser(prov_url) # Define column names as the same as HYDAT - colHeaders <- - c( - "STATION_NUMBER", - "Date", - "Level", - "Level_GRADE", - "Level_SYMBOL", - "Level_CODE", - "Flow", - "Flow_GRADE", - "Flow_SYMBOL", - "Flow_CODE" - ) + colHeaders <- realtime_cols_headers() output <- readr::read_csv( res, col_names = colHeaders, - col_types = readr::cols( - STATION_NUMBER = readr::col_character(), - Date = readr::col_datetime(), - Level = readr::col_double(), - Level_GRADE = readr::col_character(), - Level_SYMBOL = readr::col_character(), - Level_CODE = readr::col_integer(), - Flow = readr::col_double(), - Flow_GRADE = readr::col_character(), - Flow_SYMBOL = readr::col_character(), - Flow_CODE = readr::col_integer() - ) + col_types = realtime_cols_types() ) @@ -180,6 +125,36 @@ all_realtime_station <- function(PROV) { realtime_tidy_data(output, PROV) } +realtime_cols_types <- function() { + readr::cols( + STATION_NUMBER = readr::col_character(), + Date = readr::col_datetime(), + Level = readr::col_double(), + Level_GRADE = readr::col_character(), + Level_SYMBOL = readr::col_character(), + Level_CODE = readr::col_integer(), + Flow = readr::col_double(), + Flow_GRADE = readr::col_character(), + Flow_SYMBOL = readr::col_character(), + Flow_CODE = readr::col_integer() + ) +} + +realtime_cols_headers <- function() { + c( + "STATION_NUMBER", + "Date", + "Level", + "Level_GRADE", + "Level_SYMBOL", + "Level_CODE", + "Flow", + "Flow_GRADE", + "Flow_SYMBOL", + "Flow_CODE" + ) +} + realtime_tidy_data <- function(data, prov) { ## Create symbols diff --git a/R/zzz.R b/R/zzz.R index a64bf81..6229cae 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -18,7 +18,7 @@ packageStartupMessage(info("Checking for a new version of HYDAT...")) base_url <- "http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/" - x <- tidyhydat_realtime_csv_parser(base_url) + x <- realtime_parser(base_url) ## Extract newest HYDAT new_hydat <- as.Date(substr(gsub( From 4aa5242509df9fd754c3ae265febd4759d234d2f Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 4 Oct 2024 09:38:27 -0700 Subject: [PATCH 14/17] style --- R/realtime.R | 2 +- R/utils-realtime.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/realtime.R b/R/realtime.R index 58d3c17..f28a8fd 100644 --- a/R/realtime.R +++ b/R/realtime.R @@ -202,4 +202,4 @@ realtime_daily_mean <- function(.data, na.rm = FALSE) { df_mean <- dplyr::arrange(df_mean, Parameter) dplyr::ungroup(df_mean) -} +} \ No newline at end of file diff --git a/R/utils-realtime.R b/R/utils-realtime.R index fe3643c..2b648eb 100644 --- a/R/utils-realtime.R +++ b/R/utils-realtime.R @@ -76,7 +76,7 @@ single_realtime_station <- function(station_number) { col_types = realtime_cols_types() ) } - + # download daily file p_resp_str <- realtime_parser(infile[2]) @@ -186,4 +186,4 @@ has_internet <- function() { silent = TRUE ) !inherits(z, "try-error") -} +} \ No newline at end of file From 3e6171fccf71a25dc4ec4ae99738c53d57702a13 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 4 Oct 2024 09:47:58 -0700 Subject: [PATCH 15/17] one more skip --- R/utils-realtime.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-realtime.R b/R/utils-realtime.R index 2b648eb..ef1bd56 100644 --- a/R/utils-realtime.R +++ b/R/utils-realtime.R @@ -113,9 +113,9 @@ all_realtime_station <- function(PROV) { # Define column names as the same as HYDAT colHeaders <- realtime_cols_headers() - output <- readr::read_csv( res, + skip = 1, col_names = colHeaders, col_types = realtime_cols_types() ) From c325e325d62875b2e490217c91f12eb22a0d5657 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 4 Oct 2024 10:04:44 -0700 Subject: [PATCH 16/17] set min version of httr2 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c3d5ba4..9f0201e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Imports: DBI (>= 0.7), dbplyr (>= 1.1.0), dplyr (>= 0.7.4), - httr2, + httr2 (>= 1.0.0), lubridate (>= 1.6.0), rappdirs (>= 0.3.1), readr (>= 1.1.1), From 3b34e9bc60cc57876067a0bc921d460e2513e2b1 Mon Sep 17 00:00:00 2001 From: Sam Albers Date: Fri, 4 Oct 2024 13:33:08 -0700 Subject: [PATCH 17/17] bump R version --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9f0201e..a1c0558 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ License: Apache License (== 2.0) | file LICENSE URL: https://docs.ropensci.org/tidyhydat/, https://github.com/ropensci/tidyhydat/ BugReports: https://github.com/ropensci/tidyhydat/issues/ Depends: - R (>= 3.4.0) + R (>= 4.0.0) Imports: cli (>= 1.0.0), crayon (>= 1.3.4), diff --git a/NEWS.md b/NEWS.md index 479ff79..c9657a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,5 @@ # tidyhydat 0.6.2 +- bump minimum R version to 4.0.0 - dropped httr in favour of httr2 - fix bug where `download_hydat()` fails if `tempdir()` is on a different device than `hydat_path` (@mpdavison, #192) - fix bug where `download_hydat()` wasn't checking alternative paths for success (@Travis-Simmons)