diff --git a/.env.sample b/.env.sample index cb708a9..c3c496b 100644 --- a/.env.sample +++ b/.env.sample @@ -13,8 +13,8 @@ PREPROCESS_DB_CDM_SCHEMA= # Schema name in the database to connect the PREPROCESS_SUMMARISE_LEVEL=monthly # Level to summarise record counts at (monthly or quarterly) # Low-frequency replacement -LOW_FREQUENCY_THRESHOLD=5 -LOW_FREQUENCY_REPLACEMENT=2.5 +LOW_FREQUENCY_THRESHOLD=10 +LOW_FREQUENCY_REPLACEMENT=9.999999 # For testing TEST_DB_PATH=./data-raw/test_db/eunomia diff --git a/app/R/mod_datatable.R b/app/R/mod_datatable.R index bd5fac8..bfa73de 100644 --- a/app/R/mod_datatable.R +++ b/app/R/mod_datatable.R @@ -64,12 +64,21 @@ mod_datatable_server <- function(id, selected_dates, bundle_concepts) { moduleServer(id, function(input, output, session) { concepts_with_counts <- reactive({ + low_freq_threshold <- as.numeric(Sys.getenv("LOW_FREQUENCY_THRESHOLD")) + join_counts_to_concepts(all_concepts, monthly_counts, selected_dates()) |> # Reorder and select the columns we want to display dplyr::select( "concept_id", "concept_name", "total_records", "mean_persons", "domain_id", "vocabulary_id", "concept_class_id" + ) |> + # Conditionally round numbers for better display + dplyr::mutate( + dplyr::across( + dplyr::where(is.double), + function(x) ifelse(x > low_freq_threshold, round(x), round(x, 2)) + ) ) }) output$datatable <- DT::renderDT(concepts_with_counts(), @@ -78,8 +87,8 @@ mod_datatable_server <- function(id, selected_dates, bundle_concepts) { colnames = c( "ID" = "concept_id", "Name" = "concept_name", - "Records" = "total_records", - "Patients" = "mean_persons", + "Total Records" = "total_records", + "Average Patients" = "mean_persons", "Domain ID" = "domain_id", "Vocabulary ID" = "vocabulary_id", "Concept Class ID" = "concept_class_id" @@ -111,10 +120,10 @@ join_counts_to_concepts <- function(concepts, monthly_counts, selected_dates) { filter_dates(selected_dates) |> dplyr::group_by(.data$concept_id) |> dplyr::summarise( - # round to avoid decimal values in in total_records because of low-req replacement - total_records = sum(round(.data$record_count)), - mean_persons = round(mean(.data$person_count, na.rm = TRUE), 2), - mean_records_per_person = round(mean(.data$records_per_person, na.rm = TRUE), 2) + total_records = sum(.data$record_count), + # Note that we can only calculate the average number of persons per month here + # as we cannot identify unique patients across months + mean_persons = mean(.data$person_count, na.rm = TRUE), ) # Use inner_join so we only keep concepts for which we have counts in the selected dates dplyr::inner_join(concepts, summarised_counts, by = "concept_id") diff --git a/app/tests/testthat/test-mod_datatable.R b/app/tests/testthat/test-mod_datatable.R index 0832f75..b394816 100644 --- a/app/tests/testthat/test-mod_datatable.R +++ b/app/tests/testthat/test-mod_datatable.R @@ -66,13 +66,12 @@ test_that("Adding records and patients counts to concepts table works", { concepts_with_counts <- join_counts_to_concepts(mock_selection_data, mock_monthly_counts, selected_dates) expect_in( - c("concept_id", "concept_name", "total_records", "mean_persons", "mean_records_per_person"), + c("concept_id", "concept_name", "total_records", "mean_persons"), names(concepts_with_counts) ) expect_equal(nrow(concepts_with_counts), 3) expect_equal(concepts_with_counts$total_records, c(100, 200, 300)) expect_equal(concepts_with_counts$mean_persons, c(10, 10, 10)) - expect_equal(concepts_with_counts$mean_records_per_person, c(10, 10, 10)) }) test_that("Added counts depends on selected dates", { @@ -81,7 +80,6 @@ test_that("Added counts depends on selected dates", { expect_equal(concepts_with_counts$total_records, c(100, 100, 100)) expect_equal(concepts_with_counts$mean_persons, c(10, 10, 10)) - expect_equal(concepts_with_counts$mean_records_per_person, c(10, 10, 10)) }) test_that("Only concepts with data for the selected date range are kept", { diff --git a/data/test_data/internal/omopcat_concepts.parquet b/data/test_data/internal/omopcat_concepts.parquet index 900b2a4..6c8e852 100644 Binary files a/data/test_data/internal/omopcat_concepts.parquet and b/data/test_data/internal/omopcat_concepts.parquet differ diff --git a/data/test_data/internal/omopcat_monthly_counts.parquet b/data/test_data/internal/omopcat_monthly_counts.parquet index 3dcec1e..b282be5 100644 Binary files a/data/test_data/internal/omopcat_monthly_counts.parquet and b/data/test_data/internal/omopcat_monthly_counts.parquet differ diff --git a/data/test_data/internal/omopcat_summary_stats.parquet b/data/test_data/internal/omopcat_summary_stats.parquet index 9e431d4..257aa4f 100644 Binary files a/data/test_data/internal/omopcat_summary_stats.parquet and b/data/test_data/internal/omopcat_summary_stats.parquet differ diff --git a/data/test_data/public/omopcat_concepts.parquet b/data/test_data/public/omopcat_concepts.parquet index 250113e..7e4d3bd 100644 Binary files a/data/test_data/public/omopcat_concepts.parquet and b/data/test_data/public/omopcat_concepts.parquet differ diff --git a/data/test_data/public/omopcat_monthly_counts.parquet b/data/test_data/public/omopcat_monthly_counts.parquet index 3009d2a..1560602 100644 Binary files a/data/test_data/public/omopcat_monthly_counts.parquet and b/data/test_data/public/omopcat_monthly_counts.parquet differ diff --git a/data/test_data/public/omopcat_summary_stats.parquet b/data/test_data/public/omopcat_summary_stats.parquet index d7405d1..b82e536 100644 Binary files a/data/test_data/public/omopcat_summary_stats.parquet and b/data/test_data/public/omopcat_summary_stats.parquet differ diff --git a/public.env.sample b/public.env.sample index 1378e1d..e9b0c46 100644 --- a/public.env.sample +++ b/public.env.sample @@ -14,7 +14,7 @@ PREPROCESS_SUMMARISE_LEVEL=quarterly # Level to summarise record counts at (mon # Low-frequency replacement LOW_FREQUENCY_THRESHOLD=10 -LOW_FREQUENCY_REPLACEMENT=5 +LOW_FREQUENCY_REPLACEMENT=9.99 # For testing TEST_DB_PATH=./data-raw/test_db/eunomia