diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..55d912a
--- /dev/null
+++ b/.github/CODE_OF_CONDUCT.md
@@ -0,0 +1,140 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation
+in our Scenario Modeling Hub community a harassment-free experience
+for everyone, regardless of age, body size, visible or invisible
+disability, ethnicity, sex characteristics, gender identity and
+expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, political
+affiliation, religion, or sexual identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, 
+welcoming, diverse, inclusive, and healthy community.
+
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our 
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes, 
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall 
+  community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances 
+  of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others’ private information, such as a physical or email address, 
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a 
+  professional setting
+
+
+## Enforcement Responsibilities
+
+The code of conduct committee is responsible for clarifying and
+enforcing our standards of acceptable behavior and will take
+appropriate and fair corrective action in response to any behavior
+that they deem inappropriate, threatening, offensive, or harmful.
+
+The code of conduct committee has the right and responsibility to
+remove, edit, or reject comments, commits, code, wiki edits, issues,
+and other contributions that are not aligned to this Code of Conduct,
+and will communicate reasons for moderation decisions when
+appropriate.
+
+The SMH code of conduct committee consists of three members:
+
+- [Lucie Contamin](mailto:contamin@pitt.edu)
+- [Emily Howerton](mailto:eh6323@princeton.edu)
+- [Bryan Lewis](mailto:brylew@virginia.edu)
+
+
+## Scope
+
+This Code of Conduct applies within all Scenario Modeling Hub public online 
+shared resources (e.g. GitHub, shared Slack channels).
+
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior
+may be reported to the code of conduct committee responsible for
+enforcement by email. All complaints will be reviewed and
+investigated promptly and fairly. 
+
+All members of the code of conduct committee are obligated to respect 
+the privacy and security of the reporter of any incident.
+
+
+## Enforcement Guidelines
+
+The code of conduct committee will follow these Community Impact 
+Guidelines in determining the consequences for any action they 
+deem in violation of this Code of Conduct:
+
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior 
+deemed unprofessional or unwelcome in the community.
+
+**Consequence**:  A private, written warning from the code of conduct 
+committee, providing clarity around the nature of the violation and an 
+explanation of why the behavior was inappropriate. A public apology may 
+be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of 
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No 
+interaction with the people involved, including unsolicited interaction 
+with those enforcing the Code of Conduct, for a specified period of time. 
+This includes avoiding interactions in community spaces as well as external 
+channels like social media. Violating these terms may lead to a temporary 
+or permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including 
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public 
+communication with the community for a specified period of time. No public 
+or private interaction with the people involved, including unsolicited 
+interaction with those enforcing the Code of Conduct, is allowed during 
+this period. Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community 
+standards, including sustained inappropriate behavior, harassment of an 
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within 
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the 
+[Contributor Covenant](https://www.contributor-covenant.org), 
+version 2.1, available at 
+<https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
+
+Community Impact Guidelines were inspired by 
+[Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].
+
+For answers to common questions about this code of conduct, see the FAQ at 
+<https://www.contributor-covenant.org/faq>. 
+Translations are available at <https://www.contributor-covenant.org/translations>.
diff --git a/.github/workflows/install_cache_dependencies.yml b/.github/workflows/install_cache_dependencies.yml
index c434f3b..b8d034d 100644
--- a/.github/workflows/install_cache_dependencies.yml
+++ b/.github/workflows/install_cache_dependencies.yml
@@ -8,7 +8,7 @@ on:
 
 jobs:
     cache-R-library:
-        runs-on: ubuntu-latest
+        runs-on: ubuntu-20.04
 
         steps:
         - name: Checkout repository
@@ -32,7 +32,7 @@ jobs:
             R -e 'Sys.setenv("NOT_CRAN" = TRUE)
                   install.packages(c("arrow", "gh", "remotes"))
                   Sys.unsetenv("NOT_CRAN")
-                  remotes::install_url("https://github.com/midas-network/SMHvalidation/archive/refs/heads/main.zip")
+                  remotes::install_url("https://github.com/midas-network/SMHvalidation/archive/refs/heads/version.zip")
                   remotes::install_url("https://github.com/Infectious-Disease-Modeling-Hubs/hubUtils/archive/refs/heads/main.zip")'
-          env: 
+          env:
             GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml
index 20450d5..955a41e 100644
--- a/.github/workflows/validation.yml
+++ b/.github/workflows/validation.yml
@@ -3,10 +3,12 @@ on:
   pull_request_target:
     branches:
       - main
-
+      - equity
+    paths:
+      - data-processed/**
 jobs:
     validates-files:
-        runs-on: ubuntu-latest
+        runs-on: ubuntu-20.04
 
         steps:
         - name: Checkout repository
@@ -21,12 +23,12 @@ jobs:
           run: sudo apt-get install libcurl4-openssl-dev libudunits2-dev libgdal-dev
 
 
-        - name: Cache R packages 
+        - name: Cache R packages
           id: cache
-          if: runner.os != 'Windows' 
-          uses: actions/cache@v2 
-          with: 
-            path: ${{ env.R_LIBS_USER }} 
+          if: runner.os != 'Windows'
+          uses: actions/cache@v2
+          with:
+            path: ${{ env.R_LIBS_USER }}
             key: ${{ runner.os }}-${{ hashFiles('.github/requirements.txt') }}-1
 
         - name: Install dependencies
@@ -35,11 +37,11 @@ jobs:
             R -e 'Sys.setenv("NOT_CRAN" = TRUE)
                   install.packages(c("arrow", "gh", "remotes"))
                   Sys.unsetenv("NOT_CRAN")
-                  remotes::install_url("https://github.com/midas-network/SMHvalidation/archive/refs/heads/main.zip")
+                  remotes::install_url("https://github.com/midas-network/SMHvalidation/archive/refs/heads/version.zip")
                   remotes::install_url("https://github.com/Infectious-Disease-Modeling-Hubs/hubUtils/archive/refs/heads/main.zip")'
-          env: 
+          env:
             GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-            
+
         - name: Run Validation
           run: |
             Rscript code/validation.R
diff --git a/code/example_ensemble.R b/code/example_ensemble.R
deleted file mode 100644
index bd78d8c..0000000
--- a/code/example_ensemble.R
+++ /dev/null
@@ -1,243 +0,0 @@
-rm(list = ls())
-print(paste0("TOTAL Process start:", Sys.time()))
-library(arrow)
-library(magrittr)
-library(dplyr)
-
-# Function -------
-
-#' Calculate LOP Ensemble(s) for peak targets
-#'
-#' Calculate LOP Ensemble (trimmed or untrimmed, with weight or not) for both
-#' peak size hosp and peak time hosp by using
-#' `CombineDistributions::aggregate_cdfs()`. The input file should match the
-#' expected input format of this function.
-#'
-#' @param df_peak data frame containing the peak target information to calculate
-#'  ensemble on, the data frame should be in the expected format
-#' @param weighting_scheme string to indicate how to weight in the aggregate (
-#'  see function documentation)
-#' @param n_trim integer denoting the number of models to trim (
-#'  see function documentation). If NA, no trim applied
-#' @param weight.df data frame containing the weight information by team-model,
-#'  should have 2 columns: `model_name` and `weight`.
-#' @param ens_group character vector of group of column on which to calculate
-#'  the ensemble (for example, aggregate by scenario, target, horizon, location,
-#'  and age group for the FLU projections)
-#'
-#' @details
-#' The function returns a data frame in the SMH standard format
-#'
-#'
-#' @importFrom dplyr filter arrange mutate select
-#' @importFrom CombineDistributions aggregate_cdfs
-peak_ensemble <- function(df_peak, weighting_scheme, n_trim, weight.df,
-                          ens_group, id_var = "model_name") {
-
-  df_peak_time <- dplyr::filter(df_peak, grepl("time", target)) %>%
-    dplyr::arrange(target_end_date)
-  df_peak_size <- dplyr::filter(df_peak, grepl("size", target))
-  if (is.na(n_trim)) {
-    df_lop_peak_time <- CombineDistributions::aggregate_cdfs(
-      df_peak_time, id_var = id_var,
-      group_by = ens_group, method = "LOP", weighting_scheme = weighting_scheme,
-      weights = weight.df, ret_quantiles = NA, reorder_quantiles = FALSE,
-      ret_values = unique(df_peak_time$target_end_date)) %>%
-      dplyr::mutate(target_end_date = value,
-                    value = quantile,
-                    output_type_id = make_epiweek(target_end_date),
-                    output_type = "cdf",
-                    horizon = NA) %>%
-      dplyr::select(-quantile, -target_end_date)
-    df_lop_peak_size <- CombineDistributions::aggregate_cdfs(
-      df_peak_size, id_var = id_var,
-      group_by = ens_group, method = "LOP", weighting_scheme = weighting_scheme,
-      weights = weight.df, ret_quantiles = unique(df_peak_size$quantile),
-      ret_values = NA)  %>%
-      dplyr::mutate(horizon = NA,
-                    output_type = "quantile",
-                    output_type_id = quantile) %>%
-      dplyr::select(-quantile)
-  } else {
-    df_lop_peak_time <- CombineDistributions::aggregate_cdfs(
-      df_peak_time, id_var = id_var,
-      group_by = ens_group, method = "LOP", weighting_scheme = weighting_scheme,
-      n_trim = n_trim, ret_quantiles = NA, reorder_quantiles = FALSE,
-      ret_values = unique(df_peak_time$target_end_date)) %>%
-      dplyr::mutate(target_end_date = value,
-                    value = quantile,
-                    output_type_id = make_epiweek(target_end_date),
-                    output_type = "cdf",
-                    horizon = NA) %>%
-      dplyr::select(-quantile, -target_end_date)
-    df_lop_peak_size <- CombineDistributions::aggregate_cdfs(
-      df_peak_size, id_var = id_var,
-      group_by = ens_group, method = "LOP", weighting_scheme = weighting_scheme,
-      n_trim = n_trim, ret_quantiles = unique(df_peak_size$quantile),
-      ret_values = NA) %>%
-      dplyr::mutate(horizon = NA,
-                    output_type = "quantile",
-                    output_type_id = quantile) %>%
-      dplyr::select(-quantile)
-  }
-  df_lop_peak <- rbind(df_lop_peak_size, df_lop_peak_time)
-  return(df_lop_peak)
-}
-
-# Prerequisite -----
-list_quantiles <- c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
-                    0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9,
-                    0.95, 0.975, 0.99)
-# vector of quantile value to integrate in the calculation of the ensemble
-
-ens_group <- c("scenario_id", "target", "horizon", "origin_date", "location",
-               "age_group")
-# group of column on which to calculate the ensemble (for example, aggregate
-# by scenario, target, horizon (target_end_date), location,
-# and age group for the FLU projections)
-quantile_vect = list_quantiles
-peak_group = c("origin_date", "scenario_id", "location", "target",
-               "age_group", "output_type_id")
-all_target <- c("peak time hosp", "peak size hosp", "inc hosp", "cum hosp",
-                "inc death", "cum death")
-
-schema <- hubData::create_hub_schema(hubUtils::read_config(".", "tasks"))
-schema <- c(schema$fields,
-            arrow::Field$create("run_grouping", arrow::int64()),
-            arrow::Field$create("stochastic_run", arrow::int64()))
-schema <- arrow::schema(schema)
-
-source("code/utils.R")
-source("code/temp_calc_ens.R")
-
-# Calculate Ensemble ------
-message("# Quantile ensemble")
-print(paste0("Process quantile ensemble start:", Sys.time()))
-df <- hubData::connect_model_output("output-processed/",
-                                    partition_names = c("model_id",
-                                                        "origin_date", "target",
-                                                        "location"),
-                                    file_format = "parquet",
-                                    schema = schema) %>%
-  dplyr::filter(origin_date == "2024-04-28", output_type == "quantile") %>%
-  dplyr::collect() %>%
-  dplyr::select(-run_grouping, -stochastic_run) %>%
-  janitor::remove_empty(which = c("rows", "cols"))
-
-ens_func = function(x, y) {
-  matrixStats::weightedMedian(x, w = y, na.rm = TRUE) }
-
-weight.df = data.frame(
-  model_id = unique(df$model_id),
-  weight = 1)
-
-df$output_type_id <- as.numeric(df$output_type_id)
-
-time_ens <- system.time({
-  df_hv <- hubData::as_model_out_tbl(df)
-  ens <- hubEnsembles::simple_ensemble(df_hv, weight.df, agg_fun = "median")
-  ens <- dplyr::select(ens, -model_id)
-})
-print("Time Ensemble:")
-print(time_ens)
-time_ens_lop <- system.time({
-  df_lop <- ensemble_lop(df, list_quantiles,
-                         weights = weight.df,
-                         weighting_scheme = "cdf_exterior",
-                         n_trim = 2, ens_group = ens_group,
-                         id_var = "model_id")
-
-})
-print("Time Ensemble LOP:")
-print(time_ens_lop)
-time_ens_lop_un <- system.time({
-  df_unlop <- ensemble_lop(df, list_quantiles, weights = weight.df,
-                           weighting_scheme = "user_defined",
-                           n_trim = NA, ens_group = ens_group,
-                           id_var = "model_id")
-})
-print("Time Ensemble LOP untrimmed:")
-print(time_ens_lop_un)
-print(paste0("Process quantile ensemble stop:", Sys.time()))
-
-# Peak ensemble
-message("# Peak ensemble")
-print(paste0("Process peak ensemble start:", Sys.time()))
-df <- hubData::connect_model_output("output-processed/",
-                                    partition_names = c("model_id",
-                                                        "origin_date", "target",
-                                                        "location"),
-                                    file_format = "parquet",
-                                    schema = schema) %>%
-  dplyr::filter(origin_date == "2024-04-28", grepl("peak", target)) %>%
-  dplyr::collect() %>%
-  dplyr::select(-run_grouping, -stochastic_run, -race_ethnicity)
-
-
-# Peak time
-df_time <- dplyr::filter(df, grepl("peak time", target)) %>%
-  dplyr::mutate(
-    quantile = value,
-    target_end_date = MMWRweek::MMWRweek2Date(
-      as.numeric(substr(gsub("EW", "", output_type_id), 1, 4)),
-      as.numeric(substr(gsub("EW", "", output_type_id), 5, 6)), 7
-    ),
-    value = as.numeric(MMWRweek::MMWRweek2Date(
-      as.numeric(substr(gsub("EW", "", output_type_id), 1, 4)),
-      as.numeric(substr(gsub("EW", "", output_type_id), 5, 6)), 7
-    ))) %>%
-  dplyr::arrange(target_end_date)
-# Peak size
-df_size <- dplyr::filter(df, grepl("peak size", target),
-                         !is.na(output_type_id)) %>%
-  dplyr::arrange(output_type_id) %>%
-  dplyr::filter(output_type_id %in% list_quantiles) %>%
-  dplyr::mutate(quantile = output_type_id,
-                target_end_date = as.Date(NA))
-# All
-df_peak <- rbind(df_time, df_size)
-df_lop_peak <- peak_ensemble(df_peak, weighting_scheme =  "cdf_exterior",
-                             n_trim = 2, weight.df = weight.df,
-                             ens_group = ens_group, id_var = "model_id")
-df_unlop_peak <- peak_ensemble(df_peak, weighting_scheme = "user_defined",
-                               n_trim = NA, weight.df = weight.df,
-                               ens_group = ens_group, id_var = "model_id")
-
-print(paste0("Process peak ensemble stop:", Sys.time()))
-
-# Write output
-if (!dir.exists("output-processed/Ensemble/"))
-  dir.create("output-processed/Ensemble/")
-print("Write Ensemble ...")
-print(head(ens))
-arrow::write_dataset(ens, paste0("output-processed/Ensemble/"),
-                     partitioning = c("origin_date", "target", "location"),
-                     hive_style = FALSE, compression = "gzip",
-                     compression_level = 9,
-                     basename_template = "Ensemble{i}.gz.parquet")
-
-df_lop_tot <- rbind(df_lop, df_lop_peak)
-if (!dir.exists("output-processed/Ensemble_LOP/"))
-  dir.create("output-processed/Ensemble_LOP/")
-print("Write Ensemble LOP ...")
-print(head(df_lop_tot))
-arrow::write_dataset(df_lop_tot, paste0("output-processed/Ensemble_LOP/"),
-                     partitioning = c("origin_date", "target", "location"),
-                     hive_style = FALSE, compression = "gzip",
-                     compression_level = 9,
-                     basename_template = "Ensemble_LOP{i}.gz.parquet")
-
-df_unlop_tot <- rbind(df_unlop, df_unlop_peak)
-if (!dir.exists("output-processed/Ensemble_LOP_untrimmed/"))
-  dir.create("output-processed/Ensemble_LOP_untrimmed/")
-print("Write Ensemble LOP untrimmed ...")
-print(head(df_unlop_tot))
-arrow::write_dataset(df_unlop_tot,
-                     paste0("output-processed/Ensemble_LOP_untrimmed/"),
-                     partitioning = c("origin_date", "target", "location"),
-                     hive_style = FALSE, compression = "gzip",
-                     compression_level = 9,
-                     basename_template = "Ensemble_LOP_untrimmed{i}.gz.parquet")
-
-print(paste0("TOTAL Process stop:", Sys.time()))
-rm(list = ls())
diff --git a/code/example_process.R b/code/example_process.R
deleted file mode 100644
index 38bea48..0000000
--- a/code/example_process.R
+++ /dev/null
@@ -1,128 +0,0 @@
-rm(list = ls())
-library(arrow)
-library(magrittr)
-library(dplyr)
-
-# Prerequisite -----
-list_quantiles <- c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
-                    0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9,
-                    0.95, 0.975, 0.99)
-# vector of quantile value to integrate in the calculation of the ensemble
-
-group_column <- c("scenario_id", "target", "origin_date", "location",
-                  "model_name", "horizon")
-# group of column defining individual groups (for example, expect to have
-# one projection per scenario, target, horizon (target_end_date), location,
-# model and age group for the FLU projections)
-
-ens_group <- c("scenario_id", "target", "horizon", "origin_date", "location",
-               "age_group")
-# group of column on which to calculate the ensemble (for example, aggregate
-# by scenario, target, horizon (target_end_date), location,
-# and age group for the FLU projections)
-
-cumul_group = c("origin_date", "scenario_id", "location", "target",
-                "output_type", "output_type_id", "age_group")
-quant_group = c("origin_date", "scenario_id", "location", "target",
-                "horizon", "age_group")
-quantile_vect = list_quantiles
-peak_group = c("origin_date", "scenario_id", "location", "target",
-               "age_group", "output_type_id")
-all_target <- c("peak time hosp", "peak size hosp", "inc hosp", "cum hosp",
-                "inc death", "cum death")
-
-schema <- arrow::schema(
-  "origin_date" = arrow::string(),
-  "target" = arrow::string(),
-  "model_name" = arrow::string(),
-  "scenario_id" = arrow::string(),
-  "location" = arrow::string(),
-  "horizon" = arrow::int64(),
-  "age_group" = arrow::string(),
-  "output_type" = arrow::string(),
-  "output_type_id" = arrow::string(),
-  "value" = arrow::float(),
-  "run_grouping" = arrow::int64(),
-  "stochastic_run" = arrow::int64()
-)
-source("code/utils.R")
-source("code/temp_calc_ens.R")
-
-# Add missing value ------
-list_team <- grep("README", dir("data-processed/"), value = TRUE, invert = TRUE)
-lapply(list_team, function(x) {
-  a_time <- system.time({
-    print("###########")
-    test <-
-      arrow::open_dataset("data-processed/",
-                          partitioning = c("model_name", "origin_date",
-                                           "target"),
-                          factory_options = list(exclude_invalid_files = TRUE),
-                          schema = schema) %>%
-      dplyr::filter(origin_date == "2024-04-28", output_type == "sample",
-                    model_name == x) %>%
-      dplyr::collect()
-
-    df_team <- test %>%
-      dplyr::mutate_if(is.factor, as.character) %>%
-      dplyr::mutate(origin_date = as.Date(origin_date))
-
-    df_team <- dplyr::mutate(
-      df_team,
-      output_type_id = ifelse(
-        output_type == "sample",
-        as.numeric(as.factor(paste0(run_grouping, "-", stochastic_run))),
-        output_type_id))
-    df_team <- dplyr::select(df_team, -run_grouping, -stochastic_run,
-                             -model_name)
-
-    ## Weekly incident and cumulative target ---
-    df_all <-
-      make_quantiles(df_team,
-                     quant_group = c("origin_date", "scenario_id", "location",
-                                     "target", "horizon",
-                                     "age_group"),
-                     cumul_group = c("origin_date",  "scenario_id",
-                                     "location", "target", "output_type",
-                                     "output_type_id",
-                                      "age_group"),
-                     keep_cumul = TRUE)
-
-    ## Peak target ----
-    # Time
-    peak_time_target <- grep("peak time", all_target, value = TRUE)
-    df_all <- calculate_peak_time(
-      df_team, df_all, x, peak_group = peak_group,
-      peak_time_target = peak_time_target)
-
-    # Peak size
-    peak_size_target <- grep("peak size", all_target, value = TRUE)
-    df_all <- calculate_peak_size(
-      df_team, df_all, x, peak_size_target = peak_size_target,
-      peak_group = peak_group, quantile_vect = quantile_vect)
-
-    # Standardization
-    df_all <- dplyr::mutate(df_all, origin_date = as.Date(origin_date),
-                            value = ifelse(output_type == "sample",
-                                           round(value, 1), value))
-
-    # Write and check output with 'SMHvalidation' package
-    write_path <- paste0("output-processed/", x, "/")
-    if (!dir.exists(write_path)) dir.create(write_path, recursive = TRUE)
-    arrow::write_dataset(df_all, write_path,
-                         existing_data_behavior = "overwrite",
-                         partitioning = c("origin_date", "target", "location"),
-                         hive_style = FALSE, compression = "gzip",
-                         compression_level = 9,
-                         basename_template = paste0(x,
-                                                    "{i}.gz.parquet"))
-    SMHvalidation::validate_submission(
-      write_path, js_def = "hub-config/tasks.json", lst_gs = NULL,
-      pop_path = "data-locations/locations.csv", merge_sample_col = FALSE,
-      partition = c("origin_date", "target", "location"), n_decimal = 1)
-  })
-  print(x)
-  print(a_time)
-  NULL
-})
-rm(list = ls())
diff --git a/code/temp_calc_ens.R b/code/temp_calc_ens.R
deleted file mode 100644
index 1527030..0000000
--- a/code/temp_calc_ens.R
+++ /dev/null
@@ -1,809 +0,0 @@
-# ensemble_preprocess ##########################################################
-#' Aggregate submission files with model name
-#'
-#' Read all the files in a path containing the model ordered by folder.
-#' Aggregate them all in one data frame and add one columns with model name
-#' (extracted from the file name inputted)
-#'
-#' @param list_files, list of path to files to read (files should be in the SMH
-#'  standard format)
-#' @param loc_dictionary_name named vector, with the location name as value and
-#' corresponding FIPS (or US for US level) as associated names.
-#' @param location2number named vector, with the FIPS (or US for US level) as
-#' value and corresponding names as associated names.
-#'
-#' @details Both named vectors are used to verify that the `location` column
-#' is correctly filled.
-#' The `model_name` is created by extraction the value from the file(s) name(s)
-#' inputted in the parameter `list_files`. The file should be called:
-#' "DATE-MODELNAME.csv" (zip, gz and pqt files are also accepted)
-#'
-#' @noRd
-#' @importFrom dplyr filter mutate select
-#' @importFrom purrr reduce
-#' @importFrom stats setNames
-list_model <- function(list_files, loc_dictionary_name, location2number) {
-  lst_ds <- lapply(list_files, function(y) {
-    df <- read_files(y)
-    df <- dplyr::filter(df, !is.na(output_type_id),
-                        output_type == "quantile") %>%
-      dplyr::mutate(
-        output_type_id = round(as.numeric(output_type_id), 3),
-        origin_date = as.Date(origin_date),
-        model_name = gsub(
-          ".{4}-.{2}-.{2}-|.csv|.zip|.gz|.pqt|.parquet|(-)?quantile", "",
-          basename(y)),
-        location_name = loc_dictionary_name[as.character(location)],
-        location =  ifelse(nchar(location) == 1, paste0("0", location),
-                           location)) %>%
-      dplyr::mutate(location2 = location2number[location_name])
-    if (!(all(df$location == df$location2)))
-      stop("Location translation error, please check `location` column")
-    df %>% dplyr::select(-location_name, -location2)
-  })
-  lst_ds <- setNames(lst_ds, basename(list_files))
-  lst_model <- purrr::reduce(lst_ds, rbind) %>%
-    dplyr::mutate(value = round(value, 10))
-  return(lst_model)
-}
-
-#' Filter Models by quantiles
-#'
-#' Remove models that does not provide all the expected number of quantiles
-#' required in the Scenario Modeling Hub (SMH) standard and add a `round` column
-#'
-#' @param df data frame containing all the submissions
-#' @param group_column character vector, names of the column to "group_by" the
-#' df to calculate the number of quantiles in the submission. See Details' for
-#' more information
-#' @param n_quantile numeric, expected number of quantiles
-#' @param scenario_sel named vector with scenario_id as value and round
-#' information corresponding as name. For example, "A-2021-11-09" value, with
-#' "round10" as name.
-#'
-#' @details The 'group_column' parameter is used to estimate the number
-#' of quantiles in the submission. For example, the SMH standard contains 23
-#' quantiles per each group of scenario, target, location and model. In this
-#' case, the `group_column` parameter can be set to `c("scenario_id",
-#' "scenario_name", "target", "target_end_date", "location", "model_name")`, and
-#' the `n_quantile` parameter can be set to `23`. \cr\cr
-#' A column `round` is also added to the output, it allows to detect submission
-#' with projection for past round scenario.
-#' @noRd
-#'
-#' @importFrom dplyr group_by across mutate filter select ungroup
-filter_quantiles <- function(df, group_column,  n_quantile, scenario_sel) {
-  df_model_ensemble <- df %>%
-    # dplyr::group_by(all_of(group_column)) %>%
-    dplyr::group_by(across({{ group_column }})) %>%
-    dplyr::mutate(sel = ifelse(length(output_type_id) == n_quantile, 1, 0)) %>%
-    dplyr::filter(sel == 1) %>%
-    dplyr::mutate(round =  names(scenario_sel[which(
-      scenario_sel %in% scenario_id)])) %>%
-    dplyr::select(-sel) %>%
-    dplyr::ungroup()
-
-  if (dim(df_model_ensemble)[[1]] != dim(df)[[1]])
-    print("Input submission filtered - missing quantiles")
-
-  return(df_model_ensemble)
-}
-
-#' Flag non-shared projection dates
-#'
-#' Flag the model projection date that are not common in all the models
-#' projections.
-#'
-#' @param df data frame containing all the submissions
-#' @noRd
-#' @importFrom dplyr select distinct group_by summarize mutate ungroup
-flag_ncdate <- function(df) {
-  sel_df <- df %>%
-    dplyr::select(horizon, model_name, round) %>%
-    dplyr::distinct() %>%
-    dplyr::group_by(horizon, round) %>%
-    dplyr::summarize(n_model = dplyr::n(), .groups = "drop") %>%
-    dplyr::group_by(round) %>%
-    dplyr::mutate(sel = ifelse(n_model == max(n_model), 0 ,1)) %>%
-    dplyr::ungroup()
-  return(sel_df)
-}
-
-
-#' Filter non-shared projection dates
-#'
-#' Remove the model projection date that are not shared by all the models
-#' projections.
-#'
-#' @param df data frame containing all the submissions
-#' @noRd
-#' @importFrom dplyr filter bind_rows
-filter_dates <- function(df) {
-  sel_df <- flag_ncdate(df)
-  round_n <- unique(sel_df$round)
-  sel_df <- dplyr::filter(sel_df, sel == 1)
-  df_model_ensemble <- lapply(round_n, function(x) {
-    df_ens <- dplyr::filter(df, round == x)
-    sel_ndate <- sel_df[which(sel_df$round == x), "horizon", TRUE]
-    df_ens <- df_ens[which(!(df_ens$horizon %in% sel_ndate)), ]
-  }) %>% dplyr::bind_rows()
-
-  if (dim(df_model_ensemble)[[1]] != dim(df)[[1]])
-    print("Input submission filtered - rm uncommun date")
-
-  return(df_model_ensemble)
-}
-
-
-#' Prepare Submission for Ensemble Calculation
-#'
-#' Regroup all the submissions for a specific round of the Scenario Modeling
-#' Hub (SMH) in one data frame that is filtered to conserved only the required
-#' target, quantiles and possibility exclude one or multiple models (not
-#' providing all the required quantiles, or by name), and select the projection
-#' dates shared between all the submissions.
-#'
-#' @param path path to the folder containing all the submissions sub-floder
-#' @param round_path path the a csv with round information. NUll by default
-#' @param round numeric to select data for a specific round number, round number
-#'  should be written  like "1", "5", ... and the parameter `round_path` should
-#'  be filled too. If NULL (default), all round selected
-#' @param excl_model string character, name of the model to exclude. If
-#' multiple, please write as "model1|model2". By default, NULL (no exclusion)
-#' @param req_target character vector, names of the required targets
-#' @param list_quantiles vector of the required quantiles
-#' @param group_column character vector, names of the column to "group_by" the
-#' data frame of all submissions to calculate the number of quantiles in each
-#' group. See Details' for more information
-#' @param scenario_sel named vector with scenario_id as value and round
-#' information corresponding as name. For example, "A-2021-11-09" value, with
-#' "round10" as name.
-#' @param loc_dictionary_name named vector, with the location name as value and
-#' corresponding FIPS (or US for US level) as associated names.
-#' @param location2number named vector, with the FIPS (or US for US level) as
-#' value and corresponding names as associated names.
-#' @param name_quantile Boolean, if TRUE includes only file with "quantile." in
-#'  the name of the file.
-#'
-#'
-#' @details The 'group_column' parameter is used to estimate the number
-#' of quantiles in each group in all the submission. For example, the SMH
-#' standard contains 23 quantiles per each group of scenario, target, location
-#' and model. In this case, the `group_column` parameter can be set to
-#' `c("scenario_id", "scenario_name", "target", "target_end_date", "location",
-#' "model_name")`
-#' and the `list_quantiles` should be a vector of 23 values.  \cr\cr
-#' A column `round` is added to the output, it allows to detect submission
-#' with projection for past round scenario. To identify the round/scenario
-#' information, the paramet `scenario_sel` is used. \cr\cr
-#' Both named vectors (`loc_dictionary_name` and `location2number`) are used to
-#' verify that the `location` column is correctly filled.
-#'
-#' @export
-#' @importFrom dplyr filter distinct arrange
-ensemble_preprocess <- function(path, round, round_path, excl_model, req_target,
-                                list_quantiles, group_column, scenario_sel,
-                                loc_dictionary_name, location2number,
-                                name_quantile = FALSE) {
-  # List of files to read per round
-  name_model <- path_files(path, ensemble = FALSE, round_path = round_path,
-                           round = round, name_quantile = name_quantile)
-  # Files exclusion if necessary
-  if (!is.null(excl_model)) {
-    name_model <- grep(excl_model, name_model, value = TRUE, invert = TRUE)
-  }
-  # Read, process all submissions
-  df_model <- list_model(name_model, loc_dictionary_name, location2number)
-  # Keep only required targets
-  df_model <- dplyr::filter(df_model,
-                            grepl(paste(req_target, collapse = "|"), target))
-  print(unique(df_model$target)) # to be removed
-  # Filter only required quantiles and common projection dates
-  df_model <- dplyr::filter(df_model, output_type_id %in% list_quantiles)
-
-  df_model_ensemble <- filter_quantiles(
-    df_model, group_column, n_quantile = length(list_quantiles),
-    scenario_sel = scenario_sel) %>%
-    filter_dates() %>%
-    dplyr::distinct() %>%
-    dplyr::arrange(scenario_id, model_name, target, output_type_id, location)
-  # Print name of the model(s) included in the output
-  print(paste0("Model(s) included in the output: ",
-               paste(unique(df_model_ensemble$model_name), collapse = ", ")))
-  return(df_model_ensemble)
-}
-
-# calculate_ensemble ###########################################################
-#' Apply function on a specific quantile
-#'
-#' For a specific quantile, summarize all the models value by applying the
-#' function inputted in the parameter `ens_func`, the function should take 2
-#' parameters: "value" (1st) and "weight" (2nd). For more information, please
-#' see details.
-#'
-#' @param df data frame to summarize
-#' @param quantile numeric, specific quantile to filter and summarize on
-#' @param type string, type of the quantile (either "point" or "quantile")
-#' @param quantile_name numeric, value of the quantile
-#' @param ens_func function to apply (should have 2 parameters)
-#'
-#' @details The ensemble functions is applied on 2 parameters: value (1st) and
-#' weight (2nd) so the input `df` should have at least these columns, the
-#' quantile column and both target_end_date and target columns. \cr
-#' The inputted data frame can also be grouped by other columns
-#' but the groups will be dropped after summarizing.  \cr
-#' An example of function that can be used to calculate the ensemble is:
-#' ```
-#' ens_func <- function(x, y) matrixStats::weightedMedian(x, y, na.rm = TRUE)
-#' ```
-#' \cr\cr
-#' After applying the function, some columns are added to have a data frame
-#' output following the Scenario Modeling Hub standard. We add: `type`,
-#' `quantile` and `model_projection_date` columns. They are created by using
-#' the parameter `type`, `quantile_name`.
-#'
-#' @noRd
-#' @importFrom dplyr filter summarize ungroup mutate
-#' @importFrom rlang !!
-ensemble_quantile <- function(df, quantile, type, quantile_name, ens_func) {
-
-  df <- df %>%
-    dplyr::filter(output_type_id == !!quantile) %>%
-    dplyr::summarize(value = ens_func(value, weight), .groups = "drop") %>%
-    dplyr::ungroup() #%>%
-
-  df[, "output_type"] <- type
-  df[, "output_type_id"] <- quantile_name
-  return(df)
-}
-
-#' Calculate Ensembles by quantile
-#'
-#' For each quantile, summarize all the models value on a grouped by data frame
-#'  by applying the function inputted in the parameter `ens_func`. The function
-#' should take 2  parameters: "value" (1st) and "weight" (2nd). For more
-#' information, please see details.
-#'
-#' @param df data frame to summarize
-#' @param ens_func function to apply (should have 2 parameters)
-#' @param ens_group vector of column names to `group_by` the data frame `df`
-#' @param weight.df data frame containing the weight column information, will be
-#' join with `df` and the `weight ` column will be used as second parameter in
-#' the `ens_func`.
-#'
-#' @details The ensemble functions is applied on 2 parameters: value (1st) and
-#' weight (2nd) so the inputs `df` and `weight.df` (will be left_join in the
-#' function) should have at least the columns `value` and `weight` and a common
-#' column to join them. \cr
-#' The inputted data frame can also be grouped by multiple columns inputted in
-#' the `ens_group` parameter. The groups will be dropped after summarizing.  \cr
-#' An example of function that can be used to calculate the ensemble is:
-#' ```
-#' ens_func <- function(x, y) matrixStats::weightedMedian(x, y, na.rm = TRUE)
-#' ```
-#' \cr\cr
-#' Some columns are required to have a data frame output following the Scenario
-#' Modeling Hub standard. Please refer to the documentation on the Scenario
-#' Modeling Hub repositories or websites.
-#'
-#' @importFrom dplyr left_join group_by bind_rows mutate all_of
-#' @export
-calculate_ensemble <- function(df, ens_func, ens_group, weight.df) {
-  df <- dplyr::left_join(df, weight.df) %>%
-    dplyr::group_by(across(all_of(ens_group)))
-  # dplyr::group_by(across({{ ens_group }}))
-  df_ensemble <- lapply(unique(df$output_type_id), function(x) {
-    ensemble_quantile(df, x, "quantile", x, ens_func)
-  }) %>% dplyr::bind_rows() %>%
-    dplyr::mutate(output_type_id = round(output_type_id, 3))
-}
-
-# ensemble_lop #################################################################
-#' Calculate LOP Ensembles by quantile
-#'
-#' Calculate LOP Ensemble by using the `CombineDistributions` package, and the
-#' function `aggregate_cdfs`. For more information, please look at the package
-#' documentation.
-#'
-#' @param df data frame that contains multiple cdfs, grouped by `ens_group`
-#' columns. Specify cdf with `quantile` and `value` columns
-#' @param list_quantiles vector of quantiles to use for calculation
-#' @param weights data frame containing the weight column information
-#' @param ens_group vector containing the names of the columns to create unique
-#' aggregates for
-#' @param weighting_scheme character name of the method for aggregation.
-#' @param n_trim integer denoting the number of models to trim
-#'
-#' @importFrom dplyr ungroup left_join distinct select mutate
-#' @importFrom CombineDistributions aggregate_cdfs
-#'
-#' @export
-ensemble_lop <- function(df, list_quantiles, weights, ens_group,
-                         weighting_scheme, n_trim, id_var = "model_name") {
-  df <- dplyr::ungroup(df) %>%
-    rename(quantile = output_type_id)
-  # Calculate LOP
-  if (is.na(n_trim)) {
-    df_lop <- CombineDistributions::aggregate_cdfs(
-      df, id_var = id_var, group_by = ens_group, method = "LOP",
-      ret_quantiles = list_quantiles, weights = weights, ret_values = NA,
-      weighting_scheme = weighting_scheme)
-  } else {
-    df_lop <- CombineDistributions::aggregate_cdfs(
-      df, id_var = id_var, group_by = ens_group, method = "LOP",
-      ret_quantiles = list_quantiles, weighting_scheme = weighting_scheme,
-      n_trim = n_trim, ret_values = NA)
-  }
-  # add missing columns
-  df_lop <- df_lop %>%
-    dplyr::mutate(
-      output_type = "quantile",
-      output_type_id = round(quantile, 3)) %>%
-    dplyr::select(-quantile)
-
-  return(df_lop)
-
-}
-
-# df_lop_weight ################################################################
-#' Calculate weighted LOP Ensembles by quantile
-#'
-#' Calculate weighted LOP Ensemble by using the `psSLP` package, and the
-#' functions `generate_slp_predictions`, `generate_slp_ensemble`. For more
-#' information, please look at the package documentation.
-#'
-#' @param df data frame that contains multiple cdfs
-#' @param weight.df data frame containing the weight column information
-#' @param draw_size (default = 2000), an integer value of the number of
-#' simulated forecasts to make for each quantile function set [for
-#' `generate_slp_predictions` function]
-#' @param threads default 4, set integer number of threads to use for parallel
-#' processing [for `generate_slp_predictions` function]
-#' @param verbose (default = TRUE) a boolean indicator to show verbose status
-#' updates during calculation [for `generate_slp_predictions` function]
-#' @param parallel (default = "on" )string value either "auto","on","off". By
-#' default, the function will run in parallel using half the available threads
-#' if the number of gam models to estimates exceeds 1000 (this is "auto") mode.
-#' Setting this to "on" ("off") will force parallel processing on ("off")
-#' regardless of gam models [for `generate_slp_predictions` function]
-#' @param fixed_unif (default = TRUE), if set to TRUE this will ensure that the
-#' same set of uniform variates will be drawn for each run of the estimation
-#' function; rather than pulling runif(draw_size, unif_range[0],
-#' unif_range[1]), this option will pull seq(unif_range[0], unif_range[1],
-#' length.out = draw_size) [for `generate_slp_predictions` function]
-#' @param byvars default = `c("scenario_id", "target_end_date", "model_name",
-#' "outcome")`), a character vector indicating the grouping variables. Each
-#' unique group should represent a single quantile function (Q(p)). The
-#' calculation is run by location too by default. [for
-#' `generate_slp_predictions` function]
-#' @param ens_vars (default = `c("scenario_id", "target_end_date", "outcome")`),
-#' a character vector indicating the variables that uniquely define a set of
-#' predictions. The calculation is run by location too by default. for
-#' `generate_slp_ensemble` function]
-#' @param output_col (default = `c("scenario_id", "scenario_name",
-#' "target", "target_end_date", "location", "value", "type", "quantile",
-#' "model_projection_date")`), selection of columns to include in the output.
-#'
-#' @export
-#' @importFrom dplyr select distinct mutate filter left_join bind_rows
-#' @importFrom psSLP generate_slp_predictions generate_slp_ensemble
-#' @importFrom magrittr %>%
-#' @importFrom data.table .SD .N
-df_lop_weight <- function(df, weight.df,
-                          draw_size=2000, threads = 4,  verbose=T,
-                          parallel ="on", fixed_unif = TRUE,
-                          byvars = c("scenario_id", "horizon", "origin_date",
-                                     "model_name", "target"),
-                          ens_vars = c("scenario_id", "horizon", "origin_date",
-                                       "target"),
-                          output_col = c("scenario_id",
-                                         "target", "horizon",
-                                         "location", "value", "type",
-                                         "type_id", "origin_date")) {
-  # prepare information for output
-  #wk_number <- dplyr::select(df, horizon, target, origin_date) %>%
-  #  dplyr::distinct() %>%
-  #  dplyr::mutate(target_wk = gsub("[^[:digit:]]", "", target)) %>%
-  #  dplyr::select(target_end_date, target_wk) %>%
-  #  dplyr::distinct()
-  # run by location, outcome
-  df_lop_pred <- df
-  df_lop <- lapply(unique(df_lop_pred$location), function(x) {
-    df_lop_out <- lapply(unique(df_lop_pred$target), function(y) {
-      print(paste(x, y, collapse = ", ")) #to be removed
-      df <- dplyr::filter(df_lop_pred, grepl(y, target), location == x)
-      if (dim(df)[1] > 0) {
-        preds <- lapply(unique(df$model_name), function(z) {
-          dfmod <- dplyr::filter(df, grepl(paste0("^", z, "$"), model_name))
-          draw_size_mod <- draw_size *
-            weight.df[which(weight.df$model_name == z), "weight"]
-          print(paste0("Model:", unique(dfmod$model_name),
-                       ", draws:", draw_size_mod))
-          if (grepl("cum case", y)) {
-            preds <- psSLP::generate_slp_predictions(
-              x = dfmod, qp_var = "value", p_var = "output_type_id",
-              byvars = byvars, draw_size = draw_size_mod, threads = threads,
-              verbose = verbose, parallel = parallel, fixed_unif = fixed_unif)
-          } else {
-            preds <- psSLP::generate_slp_predictions(
-              x = dfmod, qp_var = "value", p_var = "output_type_id",
-              byvars = byvars, draw_size = draw_size_mod, threads = threads,
-              verbose = verbose, parallel = parallel, fixed_unif = fixed_unif,
-              model_configuration = list(type = "gam",
-                                         gam_con = list("family" = "poisson")))
-          }
-        }) %>% dplyr::bind_rows()
-        preds <- dplyr::left_join(preds, weight.df, by = "model_name")
-        if (length(unique(preds$model_name)) > 1) {
-          trim_level <- 2 / (unique(preds[, .(model_name,
-                                              weight)])[, sum(weight)])
-        } else {
-          trim_level <- 0
-        }
-        trimmed_ensemble <- psSLP::generate_slp_ensemble(
-          preds, qp_var = "predictions", trim = trim_level,
-          byvars = ens_vars)
-      } else {
-        trimmed_ensemble <- NULL
-      }
-      gc()
-      return(trimmed_ensemble)
-    }) %>% dplyr::bind_rows()
-    df_lop_out <- dplyr::mutate(
-      df_lop_out,
-      location = as.character(x),
-      output_type = "quantile",
-      value = predictions) %>%
-      dplyr::select(!!output_col)
-  }) %>% dplyr::bind_rows()
-  return(df_lop)
-}
-
-# write_ensemble ###############################################################
-#' Write Ensemble files
-#'
-#' Write Ensembles output by round information (in CSV or ZIP depending on the
-#' size of the output, or PARQUET). If necessary, create the folder
-#' corresponding
-#'
-#' @param df_ensemble Ensemble data frame
-#' @param path_model path to the folder containing the Ensemble sub-folder
-#' @param ens_name name of the Ensemble (by default "Ensemble")
-#' @param ext file extension of the file ("csv" or ".parquet")
-#'
-#' @details The "Ensemble" object will be store in a CSV (ZIP if the CSV is
-#' greater than 100 MB) or PARQUET file, in the sub-folder "Ensemble" in the
-#' folder corresponding to the path_model. If the ens_name is change to another
-#' name, the name of the file and the sub-folder will correspond to that new
-#' name.
-#' \cr\cr
-#' The Ensemble file name is created by extracting the "model_projection_date"
-#' information in the `df_ensemble`. The file name will be:
-#' "`MODEL_PROJECTION_DATE`-`ENS_NAME`.csv". If the Ensemble data frame contains
-#' multiple model_projection_date, multiple files will be created: one for each.
-#'
-#' @importFrom dplyr filter
-#' @export
-write_ensemble <- function(df_ensemble, path_model, ens_name = "Ensemble",
-                           ext = ".csv") {
-  lapply(unique(df_ensemble$origin_date), function(x) {
-    df <- dplyr::filter(df_ensemble, grepl(x, origin_date))
-    name_file <- paste0(path_model, ens_name, "/", x, "-", ens_name, ext)
-    if (!dir.exists(paste0(path_model, ens_name, "/"))) {
-      print(paste0("Creating Ensemble folder: ",
-                   paste0(path_model, ens_name, "/")))
-      dir.create(paste0(path_model, ens_name, "/"))
-    }
-    write_files(df, name_file, ext = ext)
-  })
-}
-# Functions --------------------------------------------------------------------
-
-#' Calculate the epiweek in "EWYYYWW" format
-#'
-#' For a specific date or a vector of date, calculate the epiweek in a
-#' "EWYYYYWW" format
-#'
-#' @param date date or vector of date to transform
-#'
-#' @export
-#' @importFrom MMWRweek MMWRweek
-make_epiweek <- function(date) {
-  epi_date <- MMWRweek::MMWRweek(date)
-  epi_year <- epi_date[["MMWRyear"]]
-  epi_week <- as.character(epi_date[["MMWRweek"]])
-  epi_week <- ifelse(nchar(epi_week) < 2, paste0(0, epi_week), epi_week)
-  epi_info <- paste0("EW", epi_year, epi_week)
-  return(epi_info)
-}
-
-#' Calculate missing optional target value
-#'
-#' For rounds with samples output_type, the cumulative trajectories are not
-#' expected and the quantiles are also not expected. This function test for
-#' weekly incident and cumulative target if all the possible values
-#' are presents and if not generate them for a specific team_model projection
-#'
-#' @param df_team model projection of a team-model
-#' @param weekly_targ_name vector of weekly incident, cummulative target name
-#' @param team_model name of the team-model
-#' @param cumul_group character vector of column names to 'group_by' the
-#' input data frame to calculate the cumulatives trajectories for each group
-#' @param quant_group character vector of column names to 'group_by' the input
-#' data frame to calculate the quantiles for each group
-#' @param quantile_vect numeric vector of probabilities with values in [0,1],
-#' to produce quantiles corresponding to the given probabilities
-#'
-#' @details
-#' ## Weekly incident and cumulative target ---
-#'  For each target (hosp or death), check that both sample and
-#'  quantiles information are available:
-#'  1. if sample target(s) is missing:
-#'    - if cumulative is missing: calculates from incident trajectories
-#'    - if incident: returns a warning message and doesn't calculate cumulative
-#'      sample and quantiles
-#'  2. if quantiles target(s) is missing:
-#'    - calculates missing quantiles from associated trajectories
-#'  3. aggregate all
-#'
-#' @importFrom ScenarioModelingHub.data path_file make_quantiles
-#' @importFrom dplyr filter bind_rows
-calculate_inccum_weekly <- function(
-    df_team, weekly_targ_name, team_model,
-    cumul_group = c("origin_date", "scenario_id", "location", "target",
-                    "output_type", "output_type_id", "age_group"),
-    quant_group = c("origin_date", "scenario_id", "location", "target",
-                    "horizon", "age_group"),
-    quantile_vect = c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
-                      0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9,
-                      0.95, 0.975, 0.99)) {
-
-  lst_all <- lapply(weekly_targ_name, function(targ) {
-    df_targ <- dplyr::filter(df_team, grepl(paste0("(inc|cum) ", targ), target))
-    # Test sample first
-    df_targ_sample <- dplyr::filter(df_targ, output_type == "sample")
-    if (nrow(df_targ_sample) < 1) {
-      warning("The submission file from: ", team_model,
-              " is missing sample for the target: ", targ,
-              ". The missing values cannot be calculated.")
-      return(df_targ)
-    } else {
-      if (!any(grepl("cum ", df_targ_sample$target))) {
-        message("The submission file from: ", team_model,
-                " is missing cumulative sample the target: ", targ,
-                ". The missing value will be calculated.")
-        # Calculate cumulative
-        # - Sample
-        inc_sample_targ <- dplyr::filter(df_targ_sample,
-                                         target == paste0("inc ", targ))
-        df_targ_sample <- ScenarioModelingHub.Data::make_quantiles(
-          inc_sample_targ, quant_group = NULL, quantile_vect = NULL,
-          cumul_group = cumul_group)
-      }
-    }
-    # Test quantile
-    df_targ_quantile <- dplyr::filter(df_targ, output_type == "quantile")
-    if (nrow(df_targ_quantile) < 1) {
-      message("The submission file from: ", team_model,
-              " is missing all quantiles for the target: ", targ,
-              ". The missing value will be calculated.")
-      df_all_targ <- ScenarioModelingHub.Data::make_quantiles(
-        df_targ_sample, quant_group = quant_group,
-        quantile_vect = quantile_vect, cumul_group = NULL)
-    } else {
-      if (any(grepl("cum", df_targ_quantile$target)) &
-          any(grepl("inc", df_targ_quantile$target))) {
-        message("The submission file from: ", team_model, " contains all the ",
-                "quantiles for the target: ", targ)
-        df_all_targ <- rbind(df_targ_quantile, df_targ_sample)
-      } else {
-        inc_sample_targ <- dplyr::filter(df_targ_sample,
-                                         target == paste0("inc ", targ))
-        cum_sample_targ <- dplyr::filter(df_targ_sample,
-                                         target == paste0("cum ", targ))
-        # Incidence
-        if (!any(grepl("inc", df_targ_quantile$target))) {
-          message("The submission file from: ", team_model,
-                  " is missing incident quantiles for the target: ", targ,
-                  ". The missing value will be calculated.")
-          df_targ_inc <- ScenarioModelingHub.Data::make_quantiles(
-            inc_sample_targ, quant_group = quant_group,
-            quantile_vect = quantile_vect, cumul_group = NULL)
-        } else {
-          df_targ_inc <- rbind(inc_sample_targ,
-                               dplyr::filter(df_targ_quantile,
-                                             target == paste0("inc ", targ)))
-        }
-        # Cumulative
-        if (!any(grepl("cum", df_targ_quantile$target))) {
-          message("The submission file from: ", team_model,
-                  " is missing cumulative quantiles for the target: ", targ,
-                  ". The missing value will be calculated.")
-          df_targ_cum <- ScenarioModelingHub.Data::make_quantiles(
-            cum_sample_targ,
-            quant_group = c("origin_date", "scenario_id", "location", "target",
-                            "horizon", "age_group"),
-            quantile_vect = quantile_vect, cumul_group = NULL)
-        } else {
-          df_targ_cum <- rbind(cum_sample_targ,
-                               dplyr::filter(df_targ_quantile,
-                                             target == paste0("cum ", targ)))
-        }
-        df_all_targ <- rbind(df_targ_cum, df_targ_inc)
-      }
-    }
-    return(df_all_targ)
-  })
-  df_all <- dplyr::bind_rows(lst_all)
-  return(df_all)
-}
-
-#' Calculate missing peak time
-#'
-#' For rounds with samples output_type, the peak time hosp target is optional.
-#' This function test for if peak time hosp is present in a specific team_model
-#' submission and if not generate them
-#'
-#' @param df_team model projection of a team-model (as submitted)
-#' @param df_all model projection of a team-model (with samples and quantiles)
-#' to append the results, NULL for no data to append
-#' @param team_model name of the team-model
-#' @param peak_time_target target name as in the submission file
-#' @param peak_group character vector of column names to 'group_by' the input
-#' data frame to calculate the peak time hospitalization target for each group
-#'
-#' @details
-#' Generates missing target from incident hospitalization trajectories by
-#' calculating cumulative probability of peak across the complete time series.
-#' For each trajectories, the horizon of the maximum incident hospitalization
-#' is extracted.
-#' For each horizon: calculate the average of how many trajectories have peaked
-#' on this week (for example: 45 trajectories on 100 trajectories have peaked on
-#' horizon 4) (probability of peak).
-#' Calculate the cumulative probability of peak across trajectories:
-#' following previous example cumulative on horizon 4 = (45/100 + cumulative
-#' probability on horizon 3)
-#'
-#' @importFrom dplyr filter group_by across all_of mutate ungroup bind_rows
-calculate_peak_time <- function(
-    df_team, df_all, team_model, peak_time_target = "peak time hosp",
-    peak_group = c("origin_date", "scenario_id", "location", "target",
-                   "age_group", "output_type_id")) {
-
-  df_time_target <- dplyr::filter(df_team, target == peak_time_target)
-  if (nrow(df_time_target) < 1) {
-    df_inc_sample <- dplyr::filter(df_team, output_type == "sample",
-                                   target == "inc hosp", age_group == "0-130") %>%
-      dplyr::mutate(output_type_id = as.numeric(output_type_id),
-                    origin_date = as.Date(origin_date))
-    if (nrow(df_inc_sample) < 1) {
-      warning("Missing inc hosp sample to calculate peak time hosp.")
-      df_all <- df_all
-    } else {
-      message("The submission file from: ", team_model,
-              " is missing value for the target: ", peak_time_target,
-              ". The missing value will be calculated.")
-
-      df_time <- dplyr::group_by(df_inc_sample, across(all_of(peak_group))) %>%
-        mutate(sel = ifelse(max(value) == value, horizon, NA)) %>%
-        mutate(sel2 = ifelse(all(value == max(value)), 0, 1)) %>%
-        filter(sel2 != 0) %>%
-        filter(!is.na(sel)) %>% arrange(horizon) %>%
-        mutate(sel3 = ifelse(duplicated(output_type_id), 0, 1)) %>%
-        filter(sel3 == 1) %>% select(-contains("sel")) %>%
-        ungroup()
-
-      lst_time <- split(df_time, list(df_time$scenario_id, df_time$location,
-                                      df_time$age_group, df_time$target),
-                        drop = TRUE)
-      peak_time <- lapply(lst_time, function(dft) {
-        df_epitime <- NULL
-        if (nrow(dft) != 300) {
-          print("No. Peaks != 300")
-          print(head(dft))
-          print(nrow(dft))
-        }
-        for (i in 1:max(df_inc_sample$horizon, na.rm = TRUE)) {
-          peak_prob = nrow(dplyr::filter(dft, horizon == i)) / nrow(dft)
-          if (!is.null(df_epitime)) {
-            peak_cum <- filter(df_epitime, horizon == i - 1) %>% .$value
-            peak_cum <- peak_cum + peak_prob
-          }   else {
-            peak_cum <- peak_prob
-          }
-          if (peak_cum >= 1) peak_cum <- 1
-          date <- MMWRweek::MMWRweek(
-            unique(dft$origin_date) + (i * 7) - 1)
-          if (nchar(date$MMWRweek) < 2) {
-            date <- paste0("EW", date$MMWRyear, "0", date$MMWRweek)
-          } else {
-            date <- paste0("EW", date$MMWRyear, date$MMWRweek)
-          }
-          df_epi <- distinct(
-            dft[, grep("output_type", peak_group, value = TRUE,
-                       invert = TRUE)]) %>%
-            mutate(horizon = i,
-                   output_type = "cdf",
-                   output_type_id = date,
-                   value = peak_cum,
-                   target = peak_time_target)
-          df_epitime <- rbind(df_epi, df_epitime)
-        }
-        df_epitime$horizon <- NA
-        return(df_epitime)
-      }) %>% bind_rows()
-      df_all <- dplyr::mutate(df_all, origin_date = as.Date(origin_date))
-      df_all <- rbind(df_all, peak_time)
-    }
-  } else {
-    message("The submission file from: ", team_model,
-            " contains value for the target: ", peak_time_target)
-    df_all <- rbind(df_all, df_time_target)
-  }
-  return(df_all)
-}
-
-
-#' Calculate missing peak size
-#'
-#' For rounds with samples output_type, the peak size hosp target is optional.
-#' This function test for if peak size hosp is present in a specific team_model
-#' submission and if not generate them
-#'
-#' @param df_team model projection of a team-model (as submitted)
-#' @param df_all model projection of a team-model (with samples and quantiles)
-#' to append the results, NULL for no data to append
-#' @param team_model name of the team-model
-#' @param peak_size_target target name as in the submission file
-#' @param peak_group character vector of column names to 'group_by' the input
-#' data frame to calculate the peak size hospitalization target for each group
-#' @param quantile_vect numeric vector of probabilities with values in [0,1],
-#' to produce quantiles corresponding to the given probabilities
-#'
-#' @details
-#' Generates missing target from incident hospitalization trajectories by
-#' extracting the max incident hospitalization value for each trajectories and
-#' then generates the quantiles distribution of the max valua across all
-#' trajectories.
-#'
-#' @importFrom dplyr filter group_by across all_of mutate ungroup bind_rows
-calculate_peak_size <- function(
-    df_team, df_all, team_model, peak_size_target = "peak size hosp",
-    peak_group = c("origin_date", "scenario_id", "location", "target",
-                   "age_group", "output_type_id"),
-    quantile_vect = c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4,
-                      0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9,
-                      0.95, 0.975, 0.99)) {
-
-  df_size_target <- dplyr::filter(df_team, target == peak_size_target)
-  if (nrow(df_size_target) < 1) {
-    df_inc_sample <- dplyr::filter(df_team, output_type == "sample",
-                                   target == "inc hosp", age_group == "0-130")
-    if (nrow(df_inc_sample) < 1) {
-      warning("Missing inc hosp sample to calculate peak size hosp.")
-      df_all <- df_all
-    } else {
-      message("The submission file from: ", team_model,
-              " is missing value for the target: ", peak_size_target,
-              ". The missing value will be calculated.")
-      df_size <- dplyr::group_by(df_inc_sample, across(all_of(peak_group))) %>%
-        dplyr::summarise(max = max(value)) %>%
-        dplyr::ungroup() %>%
-        reframe(
-          value = quantile(max, quantile_vect),
-          output_type = "quantile",
-          output_type_id = quantile_vect,
-          .by = all_of(grep("output_type", peak_group, value = TRUE,
-                            invert = TRUE))) %>%
-        mutate(horizon = NA,
-               target = peak_size_target)
-    }
-    df_all <- rbind(df_all, df_size)
-  } else {
-    message("The submission file from: ", team_model,
-            " contains value for the target: ", peak_size_target)
-    df_all <- rbind(df_all, df_size_target)
-  }
-
-  return(df_all)
-}
diff --git a/code/validation.R b/code/validation.R
index 48c4d82..baf18e0 100644
--- a/code/validation.R
+++ b/code/validation.R
@@ -12,8 +12,9 @@ pr_files_name <- pr_files_name[!"removed" == purrr::map(pr_files, "status")]
 pr_sub_files <-
   stringr::str_extract(pr_files_name,
                        "data-processed/.+/\\d{4}-\\d{2}-\\d{2}(-.*)?")
-pr_sub_files <- unique(pr_sub_files)
-if (grepl(".pqt$|.parquet$", pr_sub_files)) {
+pr_sub_files <- unique(na.omit(pr_sub_files))
+pr_sub_files <- grep("(A|a)bstract", pr_sub_files, value = TRUE, invert = TRUE)
+if (any(grepl(".pqt$|.parquet$", pr_sub_files))) {
   partition = NULL
 } else {
   partition = c("origin_date", "target")
@@ -40,6 +41,9 @@ if (length(pr_sub_files) > 0) {
 
     pr_sub_files_lst <- pr_files[grepl(pr_sub_files_group,
                                        purrr::map(pr_files, "filename"))]
+    pr_sub_files_lst <-
+      pr_sub_files_lst[!grepl("(A|a)bstract",
+                              purrr::map(pr_sub_files_lst, "filename"))]
     # run validation on all files
     test_tot <- lapply(seq_len(length(pr_sub_files_lst)), function(x) {
       # submission file download
@@ -47,10 +51,15 @@ if (length(pr_sub_files) > 0) {
         url_link <- URLdecode(pr_sub_files_lst[[x]]$raw_url)
         download.file(url_link, basename(url_link))
       } else {
+        file_part <- paste0(getwd(), "/part_sub/",
+                            pr_sub_files_lst[[x]]$filename)
+        if (!(dir.exists(dirname(file_part))))
+          dir.create(dirname(file_part), recursive = TRUE)
         url_link <- pr_sub_files_lst[[x]]$raw_url
-        download.file(url_link, pr_sub_files_lst[[x]]$filename)
+        download.file(url_link, file_part)
       }
     })
+    gc()
     # run validation
     if (sub_file_date > "2024-01-01") {
       merge_col <- TRUE
@@ -59,17 +68,21 @@ if (length(pr_sub_files) > 0) {
     }
     if (is.null(partition)) {
       val_path <- basename(pr_sub_files_group)
+      round_id <- NULL
     } else {
-      val_path <- dirname(pr_sub_files_group)
+      val_path <- paste0(getwd(), "/part_sub/", dirname(pr_sub_files_group))
+      round_id <- sub_file_date
     }
     arg_list <- list(path = val_path, js_def = js_def_file, lst_gs = lst_gs,
                      pop_path = pop_path, merge_sample_col = merge_col,
-                     partition = partition)
+                     partition = partition, round_id = round_id)
     test <- capture.output(try(do.call(SMHvalidation::validate_submission,
                                        arg_list)))
+    gc()
     if (length(grep("Run validation on fil", test, invert = TRUE)) == 0) {
       test <- try(do.call(SMHvalidation::validate_submission, arg_list))
       test <- test[1]
+      gc()
     }
     # Visualization
     df <- try({
@@ -77,6 +90,7 @@ if (length(pr_sub_files) > 0) {
         dplyr::filter(output_type == "quantile") %>%
         dplyr::collect()
     })
+    gc()
     # print(head(df))
     if (all(class(df) != "try-error") && nrow(df) > 0) {
       test_viz <- try(generate_validation_plots(
@@ -86,6 +100,7 @@ if (length(pr_sub_files) > 0) {
     } else {
       test_viz <- NA
     }
+    gc()
     if (class(test_viz) == "try-error")
       file.remove(dir(paste0(getwd(), "/proj_plot"), full.names = TRUE))
     # list of the viz and validation results
@@ -100,7 +115,7 @@ if (length(pr_sub_files) > 0) {
                              "validation was run.")))
 }
 
- # Post validation results as comment on the open PR
+# Post validation results as comment on the open PR
 test_valid <- purrr::map(test_tot, "valid")
 message <- purrr::map(test_valid, paste, collapse = "\n")