Skip to content

Commit

Permalink
Merge pull request #557 from USEPA/TADA_CheckRequiredFields
Browse files Browse the repository at this point in the history
update TADA_CheckRequiredFields and TADA_AutoClean
  • Loading branch information
cristinamullin authored Dec 23, 2024
2 parents f28f76a + f776a29 commit 06aded1
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 33 deletions.
49 changes: 36 additions & 13 deletions R/RequiredCols.R
Original file line number Diff line number Diff line change
Expand Up @@ -341,34 +341,55 @@ TADA_GetTemplate <- function() {



#' TADA Module 1 Required Fields Check
#' TADA Required Fields Check
#'
#' This function checks if all required fields for TADA Module 1 are
#' included in the input dataframe.
#' This function checks if all fields required to run TADA functions are included in the input
#' dataframe. It is used in the TADA Shiny application to test user supplied files for compatibility
#' with the application.
#'
#' @param .data A dataframe
#'
#' @return Boolean result indicating whether or not the input dataframe contains all of the TADA profile fields.
#' @return Boolean result, TRUE or FALSE, indicating whether or not the input dataframe contains all
#' of the required fields. If FALSE, an error will be returned that includes the names of all
#' missing columns.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Find web service URLs for each Profile using WQP User Interface (https://www.waterqualitydata.us/)
#' # Example WQP URL: https://www.waterqualitydata.us/#statecode=US%3A09&characteristicType=Nutrient&startDateLo=04-01-2023&startDateHi=11-01-2023&mimeType=csv&providers=NWIS&providers=STEWARDS&providers=STORET
#'
#'
#' # Use TADA_ReadWQPWebServices to load the Station, Project, and Phys-Chem Result profiles
#' stationProfile <- TADA_ReadWQPWebServices("https://www.waterqualitydata.us/data/Station/search?statecode=US%3A09&characteristicType=Nutrient&startDateLo=04-01-2023&startDateHi=11-01-2023&mimeType=csv&zip=yes&providers=NWIS&providers=STEWARDS&providers=STORET")
#' physchemProfile <- TADA_ReadWQPWebServices("https://www.waterqualitydata.us/data/Result/search?statecode=US%3A09&characteristicType=Nutrient&startDateLo=04-01-2023&startDateHi=11-01-2023&mimeType=csv&zip=yes&dataProfile=resultPhysChem&providers=NWIS&providers=STEWARDS&providers=STORET")
#' projectProfile <- TADA_ReadWQPWebServices("https://www.waterqualitydata.us/data/Project/search?statecode=US%3A09&characteristicType=Nutrient&startDateLo=04-01-2023&startDateHi=11-01-2023&mimeType=csv&zip=yes&providers=NWIS&providers=STEWARDS&providers=STORET")
#'
#'
#' # Join all three profiles using TADA_JoinWQPProfiles
#' TADAProfile <- TADA_JoinWQPProfiles(FullPhysChem = physchemProfile, Sites = stationProfile, Projects = projectProfile)
#'
#' # Run TADA_CheckRequiredFields
#' CheckRequirements_TADAProfile <- TADA_CheckRequiredFields(TADAProfile)
#' TADAProfile <- TADA_JoinWQPProfiles(FullPhysChem = physchemProfile, Sites = stationProfile,
#' Projects = projectProfile)
#'
#' # Run TADA_CheckRequiredFields, returns error message,
#' # 'The dataframe does not contain the required fields: ActivityStartDateTime'
#' TADA_CheckRequiredFields(TADAProfile)
#'
#' # Add missing col
#' TADAProfile1 <- dataRetrieval:::create_dateTime(df = TADAProfile,
#' date_col = "ActivityStartDate",
#' time_col = "ActivityStartTime.Time",
#' tz_col = "ActivityStartTime.TimeZoneCode",
#' tz = "UTC")
#'
#' review_TADAProfile1 = TADAProfile1 %>% dplyr::select(c("ActivityStartDate",
#' "ActivityStartTime.Time",
#' "ActivityStartTime.TimeZoneCode",
#' "ActivityStartDateTime",
#' "ActivityStartTime.TimeZoneCode_offset"))
#'
#' # re-run TADA_CheckRequiredFields, returns TRUE
#' TADA_CheckRequiredFields(TADAProfile1)
#' }
#'
#'
TADA_CheckRequiredFields <- function(.data) {
# remove names with TADA. string from require.cols
require.originals <- Filter(function(x) !any(grepl("TADA.", x)), require.cols)
Expand All @@ -380,8 +401,10 @@ TADA_CheckRequiredFields <- function(.data) {
if (all(require.originals %in% colnames(.data)) == TRUE) {
TRUE
} else {
stop("The dataframe does not contain the required fields.")
}
missingcols <- base::setdiff(require.originals, colnames(.data))
stop("TADA_CheckRequiredFields: the dataframe does not contain the required fields: ",
paste(as.character(missingcols),
collapse = ", ")) }
}


Expand Down
47 changes: 35 additions & 12 deletions R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ TADA_AutoClean <- function(.data) {
# execute function after checks are passed



# check to make sure columns do not already exist and capitalize fields with known synonyms that
# only differ in caps
print("TADA_Autoclean: creating TADA-specific columns.")
Expand Down Expand Up @@ -272,6 +271,18 @@ TADA_AutoClean <- function(.data) {
.data$TADA.ResultMeasure.MeasureUnitCode <- toupper(.data$ResultMeasure.MeasureUnitCode)
}

if ("ActivityStartDateTime" %in% colnames(.data)) {
.data <- .data
} else {
# creates ActivityStartDateTime and ActivityStartTime.TimeZoneCode_offset
# this is only needed when dataRetrieval is not used to get WQP data
.data <- dataRetrieval:::create_dateTime(df = .data,
date_col = "ActivityStartDate",
time_col = "ActivityStartTime.Time",
tz_col = "ActivityStartTime.TimeZoneCode",
tz = "UTC")
}

# Transform "Dissolved oxygen (DO)" characteristic name to "DISSOLVED OXYGEN SATURATION" IF
# result unit is "%" or "% SATURATN".

Expand Down Expand Up @@ -915,7 +926,9 @@ TADA_GetUniqueNearbySites <- function(.data) {
#'
#' Retrieves data for a period of time in the past 20 years using
#' TADA_DataRetrieval. This function can be used for testing functions on
#' random datasets.
#' random datasets. Only random data sets with 10 or more results will be returned.
#' If a random dataset has fewer than 10 results, the function will automatically
#' create another random WQP query until a df with greater than 10 results is returned.
#'
#' @param number_of_days Numeric. The default is 1, which will query and retrieve
#' data for a random two-day period (e.g.startDate = "2015-04-21",
Expand Down Expand Up @@ -943,20 +956,23 @@ TADA_GetUniqueNearbySites <- function(.data) {
#' df <- TADA_RandomTestingData(number_of_days = 5, choose_random_state = TRUE, autoclean = FALSE)
#' }
#'
TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FALSE, autoclean = TRUE) {
while (TRUE) {
TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FALSE,
autoclean = TRUE) {

get_random_data <- function(ndays = number_of_days, state_choice = choose_random_state,
ac = autoclean) {
# choose a random day within the last 20 years
twenty_yrs_ago <- Sys.Date() - 20 * 365
random_start_date <- twenty_yrs_ago + sample(20 * 365, 1)
# choose a random start date and add any number_of_days (set that as the end date)
end_date <- random_start_date + number_of_days
end_date <- random_start_date + ndays

if (choose_random_state == TRUE) {
if (state_choice == TRUE) {
load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
state <- sample(statecodes_df$STUSAB, 1)
}

if (choose_random_state == FALSE) {
if (state_choice == FALSE) {
state <- "null"
}

Expand All @@ -966,7 +982,7 @@ TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FAL
statecode = state
))

if (autoclean == TRUE) {
if (ac == TRUE) {
dat <- TADA_DataRetrieval(
startDate = as.character(random_start_date),
endDate = as.character(end_date),
Expand All @@ -975,19 +991,26 @@ TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FAL
)
}

if (autoclean == FALSE) {
if (ac == FALSE) {
dat <- TADA_DataRetrieval(
startDate = as.character(random_start_date),
endDate = as.character(end_date),
statecode = state,
applyautoclean = FALSE
)
}

if (nrow(dat) > 0) {
return(dat)
return(dat)
}

verify_random_data <- function() {
df <- get_random_data()
while(nrow(df) < 10) {
df <- get_random_data()
}
return(df)
}

verify_random_data()
}

#' Aggregate multiple result values to a min, max, or mean
Expand Down
35 changes: 28 additions & 7 deletions man/TADA_CheckRequiredFields.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/TADA_RandomTestingData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 06aded1

Please sign in to comment.