From 88b01888133771338649e8195e3b3b36de32861d Mon Sep 17 00:00:00 2001 From: Grant McDermott Date: Mon, 11 Dec 2023 15:44:40 -0800 Subject: [PATCH 1/5] Support numeric sequence for period argument of aggr_es. --- R/aggr_es.R | 43 +++++++++++++++++++++++++++++++++---------- R/ggcoefplot.R | 10 ++++++---- R/ggiplot.R | 7 ++++--- R/iplot_data.R | 23 +++++++++++++++-------- man/aggr_es.Rd | 16 ++++++++++------ man/ggcoefplot.Rd | 14 ++++++++------ man/iplot_data.Rd | 10 +++++----- 7 files changed, 81 insertions(+), 42 deletions(-) diff --git a/R/aggr_es.R b/R/aggr_es.R index bf2ae72..307c957 100644 --- a/R/aggr_es.R +++ b/R/aggr_es.R @@ -13,8 +13,10 @@ #' @param object A model object of class `fixest`, where the `i()` operator has #' been used to facilitate an "event-study" DiD design. See Examples. #' @param rhs Numeric. The null hypothesis value. Defaults to 0. -#' @param period Character string. Which group of periods are we aggregating? -#' One of "post" (the default), "prep", or "both". +#' @param period Keyword string or numeric sequence. Which group of periods +#' are we aggregating? Can either be one of three convenience strings---i.e., +#' "post" (the default), "prep", or "both"---or a numeric sequence that matches +#' a subset of periods in the data (e.g. 6:8). #' @param aggregation Character string. The aggregation type. Either "mean" (the #' default) or "cumulative". #' @param abbr_term Logical. Should the leading "term" column of the return @@ -40,19 +42,26 @@ #' attributes(post_mean)["hypothesis"] #' #' # Other hypothesis and aggregation options -#' aggr_es(est, aggregation = "cumulative") -#' aggr_es(est, period = "both") -#' aggr_es(est, rhs = -1, period = "pre") +#' aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects +#' aggr_es(est, period = "pre") # pre period instead of post +#' aggr_es(est, period = "both") # pre & post periods separately +#' aggr_es(est, period = 5:6) # specific subset of periods +#' aggr_es(est, rhs = -1, period = "pre") # pre period with H0 value of 1 #' # Etc. #' aggr_es = function(object, rhs = 0, - period = c("post", "pre", "both"), + # period = c("post", "pre", "both"), + period = "post", aggregation = c("mean", "cumulative"), abbr_term = TRUE, ...) { aggregation = match.arg(aggregation) - period = match.arg(period) + if (!(any(period %in% c("post", "pre", "both")) || is.numeric(period))) { + stop('The `period` argument must be one of c("post", "pre", "both"), or a numeric sequence.') + } + # period = match.arg(period) + fixest_obj = inherits(object, "fixest") if (!fixest_obj) stop("Please provide a valid fixest object.") mm = object$model_matrix_info[[1]] @@ -65,11 +74,25 @@ aggr_es = function(object, coefs = mm$coef_names_full ref_id = mm$ref_id[1] ## Store our periods in a list to make the below lapply call easier - if (period == "post") { + if (is.numeric(period)) { + if (!all(period %in% mm$items)) { + stop( + '\nSupplied period sequence does not match periods in model object.', + '\nUser-supplied periods: ', period, + 'Model periods: ', mm$items + ) + } + if (any(period %in% mm$ref)) { + warning("\nThe reference period, ", mm$ref, ", cannot be included in the aggregation and will be dropped.") + period = setdiff(period, mm$ref) + } + idx = list(match(period, mm$items)) + names(idx) = paste0("periods", paste(range(period), collapse=":")) + } else if (period == "post") { idx = list("post" = (ref_id + 1):length(coefs)) } else if (period == "pre") { ## still need to handle the "both" option idx = list("pre" = 1:(ref_id - 1)) # ref period is dropped from the model - } else { + } else if (period == "both") { idx = list("pre" = 1:(ref_id - 1), "post" = (ref_id + 1):length(coefs)) } ## We're doing a bit more work than we need to here with the lapply call and @@ -94,7 +117,7 @@ aggr_es = function(object, hyp_attr = sapply(res, function(x) {attributes(x)["hypothesis"]}) res = do.call("rbind", res) row.names(res) = NULL - if (period == "both") { + if (!is.numeric(period) && period == "both") { attributes(res) = utils::modifyList(attributes(res), hyp_attr) attributes(res)["hypothesis"] = NULL } diff --git a/R/ggcoefplot.R b/R/ggcoefplot.R index a1d3710..aab9a7a 100644 --- a/R/ggcoefplot.R +++ b/R/ggcoefplot.R @@ -18,10 +18,12 @@ #' a continuous relationship among the coefficients. #' @param multi_style Character string. One of `c('dodge', 'facet')`, defining #' how multi-model objects should be presented. -#' @param aggr_eff A character string indicating whether the aggregated mean -#' post- (and/or pre-) treatment effect should be plotted alongside the -#' individual period effects. Should be one of "none" (the default), "post", -#' "pre", or "both". +#' @param aggr_eff A keyword string or numeric sequence, indicating whether +#' mean treatment effects for some subset of the model should be displayed as +#' part of the plot. For example, the "post" keyword means that the mean +#' post-treatment effect will be plotted alongside the individual period +#' effects. Passed to [`aggr_es`]; see that function's documentation for other +#' valid options. #' @param aggr_eff.par List. Parameters of the aggregated treatment effect line, #' if plotted. The default values are `col = 'gray50'`, `lwd = 1`, `lty = 1`. #' @param facet_args A list of arguments passed down to `ggplot::fact_wrap()`. diff --git a/R/ggiplot.R b/R/ggiplot.R index 484f96b..1e9dc38 100644 --- a/R/ggiplot.R +++ b/R/ggiplot.R @@ -8,7 +8,7 @@ ggiplot = function( object, geom_style = c('pointrange', 'errorbar', 'ribbon'), multi_style = c('dodge', 'facet'), - aggr_eff = c('none', 'post', 'pre', 'both'), + aggr_eff = NULL, aggr_eff.par = list(col = 'grey50', lwd = 1, lty = 1), facet_args = NULL, theme = NULL, @@ -17,7 +17,8 @@ ggiplot = function( geom_style = match.arg(geom_style) multi_style = match.arg(multi_style) - aggr_eff = match.arg(aggr_eff) + # aggr_eff = match.arg(aggr_eff) + if (is.null(aggr_eff)) aggr_eff = "none" aggr_eff.par = utils::modifyList(list(col = "grey50", lwd = 1, lty = 1), aggr_eff.par) dots = list(...) @@ -217,7 +218,7 @@ ggiplot = function( } } + { - if (aggr_eff != "none") { + if (is.numeric(aggr_eff) || aggr_eff != "none") { geom_line(aes(y = aggr_eff), col = aggr_eff.par$col, lwd = aggr_eff.par$lwd, lty = aggr_eff.par$lty) } } + diff --git a/R/iplot_data.R b/R/iplot_data.R index 7ab4497..d7117a0 100644 --- a/R/iplot_data.R +++ b/R/iplot_data.R @@ -38,10 +38,10 @@ #' variables created with i. This is an index, just try increasing numbers to #' hopefully obtain what you want. Passed down to #' `fixest::iplot(..., i.select = .i.select)` -#' @param .aggr_es A character string indicating whether the aggregated mean -#' post- (and/or pre-) treatment effect should be added as a column to the -#' returned data frame. Passed to `aggr_es(..., aggregation = "mean")` and -#' should be one of "none" (the default), "post", "pre", or "both". +#' @param .aggr_es A keyword string or numeric sequence indicating whether the +#' aggregated mean treatment effects for some subset of the model should be +#' added as a column to the returned data frame. Passed to +#' `aggr_es(..., aggregation = "mean")`. #' @details This function is a wrapper around #' `fixest::iplot(..., only.params = TRUE)`, but with various checks and tweaks #' to better facilitate plotting with `ggplot2` and handling of complex object @@ -75,14 +75,16 @@ iplot_data = function( .dict = fixest::getFixest_dict(), .internal.only.i = TRUE, .i.select = 1, - .aggr_es = c("none", "post", "pre", "both"), + # .aggr_es = c("none", "post", "pre", "both"), + .aggr_es = NULL, .group = "auto" ) { - .aggr_es = match.arg(.aggr_es) + # .aggr_es = match.arg(.aggr_es) + if (is.null(.aggr_es)) .aggr_es = "none" if (isFALSE(.internal.only.i)) { ## No pre/post aggregation allowed for coefplot - if (.aggr_es!="none") warning("The .aggr_es argument will be ignored with (gg)coefplot calls.\n") + if (is.numeric(.aggr_es) || .aggr_es!="none") warning("The .aggr_es argument will be ignored with (gg)coefplot calls.\n") .aggr_es = "none" } @@ -382,7 +384,12 @@ iplot_data = function( } - if (.aggr_es != "none") { + if (is.numeric(.aggr_es)) { + ea = aggr_es(object, period = .aggr_es) + d$aggr_eff = NA + # d$aggr_eff[match(.aggr_es, )] + d$aggr_eff[match(.aggr_es, d$estimate_names)] = ea$estimate[1] + } else if (.aggr_es != "none") { ea = aggr_es(object, period = .aggr_es) ref_idx = which(d$is_ref) d$aggr_eff = 0 diff --git a/man/aggr_es.Rd b/man/aggr_es.Rd index b179a56..b946393 100644 --- a/man/aggr_es.Rd +++ b/man/aggr_es.Rd @@ -7,7 +7,7 @@ aggr_es( object, rhs = 0, - period = c("post", "pre", "both"), + period = "post", aggregation = c("mean", "cumulative"), abbr_term = TRUE, ... @@ -19,8 +19,10 @@ been used to facilitate an "event-study" DiD design. See Examples.} \item{rhs}{Numeric. The null hypothesis value. Defaults to 0.} -\item{period}{Character string. Which group of periods are we aggregating? -One of "post" (the default), "prep", or "both".} +\item{period}{Keyword string or numeric sequence. Which group of periods +are we aggregating? Can either be one of three convenience strings---i.e., +"post" (the default), "prep", or "both"---or a numeric sequence that matches +a subset of periods in the data (e.g. 6:8).} \item{aggregation}{Character string. The aggregation type. Either "mean" (the default) or "cumulative".} @@ -62,9 +64,11 @@ est = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did) attributes(post_mean)["hypothesis"] # Other hypothesis and aggregation options -aggr_es(est, aggregation = "cumulative") -aggr_es(est, period = "both") -aggr_es(est, rhs = -1, period = "pre") +aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects +aggr_es(est, period = "pre") # pre period instead of post +aggr_es(est, period = "both") # pre & post periods separately +aggr_es(est, period = 5:6) # specific subset of periods +aggr_es(est, rhs = -1, period = "pre") # pre period with H0 value of 1 # Etc. } diff --git a/man/ggcoefplot.Rd b/man/ggcoefplot.Rd index b3f80b9..23a513b 100644 --- a/man/ggcoefplot.Rd +++ b/man/ggcoefplot.Rd @@ -19,7 +19,7 @@ ggiplot( object, geom_style = c("pointrange", "errorbar", "ribbon"), multi_style = c("dodge", "facet"), - aggr_eff = c("none", "post", "pre", "both"), + aggr_eff = NULL, aggr_eff.par = list(col = "grey50", lwd = 1, lty = 1), facet_args = NULL, theme = NULL, @@ -58,7 +58,7 @@ require list objects. Currently used are: example, \code{zero.par = list(col = 'orange')}. \item \code{ref.line} and \code{ref.line.par} for defining or adjusting the vertical reference line. For example, \code{ref.line.par = list(col = 'red', lty = 4)}. -\item \code{pt.pch}, \code{pt.size}, and \code{pt.join} for overriding the default point estimate shapes, size, and joining them, respectively. +\item \code{pt.pch} and \code{pt.join} for overriding the default point estimate shapes and joining them, respectively. \item \code{col} for manually defining line, point, and ribbon colours. \item \code{ci_level} for changing the desired confidence level (default = 0.95). Note that multiple levels are allowed, e.g. \code{ci_level = c(0.8, 0.95)}. @@ -72,10 +72,12 @@ channel. For example, we can make the CI band lighter with \item \code{dict} a dictionary for overriding coefficient names. }} -\item{aggr_eff}{A character string indicating whether the aggregated mean -post- (and/or pre-) treatment effect should be plotted alongside the -individual period effects. Should be one of "none" (the default), "post", -"pre", or "both".} +\item{aggr_eff}{A keyword string or numeric sequence, indicating whether +mean treatment effects for some subset of the model should be displayed as +part of the plot. For example, the "post" keyword means that the mean +post-treatment effect will be plotted alongside the individual period +effects. Passed to \code{\link{aggr_es}}; see that function's documentation for other +valid options.} \item{aggr_eff.par}{List. Parameters of the aggregated treatment effect line, if plotted. The default values are \code{col = 'gray50'}, \code{lwd = 1}, \code{lty = 1}.} diff --git a/man/iplot_data.Rd b/man/iplot_data.Rd index 96eef63..e967196 100644 --- a/man/iplot_data.Rd +++ b/man/iplot_data.Rd @@ -13,7 +13,7 @@ iplot_data( .dict = fixest::getFixest_dict(), .internal.only.i = TRUE, .i.select = 1, - .aggr_es = c("none", "post", "pre", "both"), + .aggr_es = NULL, .group = "auto" ) @@ -65,10 +65,10 @@ variables created with i. This is an index, just try increasing numbers to hopefully obtain what you want. Passed down to \code{fixest::iplot(..., i.select = .i.select)}} -\item{.aggr_es}{A character string indicating whether the aggregated mean -post- (and/or pre-) treatment effect should be added as a column to the -returned data frame. Passed to \code{aggr_es(..., aggregation = "mean")} and -should be one of "none" (the default), "post", "pre", or "both".} +\item{.aggr_es}{A keyword string or numeric sequence indicating whether the +aggregated mean treatment effects for some subset of the model should be +added as a column to the returned data frame. Passed to +\code{aggr_es(..., aggregation = "mean")}.} \item{.group}{A list, default is missing. Each element of the list reports the coefficients to be grouped while the name of the element is the group From ff8bce11dbc27346e00a72f3248195553d668e1f Mon Sep 17 00:00:00 2001 From: Grant McDermott Date: Mon, 11 Dec 2023 15:49:22 -0800 Subject: [PATCH 2/5] Better example --- R/aggr_es.R | 2 +- man/aggr_es.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggr_es.R b/R/aggr_es.R index 307c957..69317b6 100644 --- a/R/aggr_es.R +++ b/R/aggr_es.R @@ -45,7 +45,7 @@ #' aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects #' aggr_es(est, period = "pre") # pre period instead of post #' aggr_es(est, period = "both") # pre & post periods separately -#' aggr_es(est, period = 5:6) # specific subset of periods +#' aggr_es(est, period = 6:8) # specific subset of periods #' aggr_es(est, rhs = -1, period = "pre") # pre period with H0 value of 1 #' # Etc. #' diff --git a/man/aggr_es.Rd b/man/aggr_es.Rd index b946393..2c5df9c 100644 --- a/man/aggr_es.Rd +++ b/man/aggr_es.Rd @@ -67,7 +67,7 @@ attributes(post_mean)["hypothesis"] aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects aggr_es(est, period = "pre") # pre period instead of post aggr_es(est, period = "both") # pre & post periods separately -aggr_es(est, period = 5:6) # specific subset of periods +aggr_es(est, period = 6:8) # specific subset of periods aggr_es(est, rhs = -1, period = "pre") # pre period with H0 value of 1 # Etc. From cfaae71bccdbd977922aeb8fe7d36f82e06ad368 Mon Sep 17 00:00:00 2001 From: Grant McDermott Date: Mon, 11 Dec 2023 16:04:25 -0800 Subject: [PATCH 3/5] Add tests --- inst/tinytest/test_aggr_es.R | 92 ++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 inst/tinytest/test_aggr_es.R diff --git a/inst/tinytest/test_aggr_es.R b/inst/tinytest/test_aggr_es.R new file mode 100644 index 0000000..0c7ab98 --- /dev/null +++ b/inst/tinytest/test_aggr_es.R @@ -0,0 +1,92 @@ +library(ggfixest) +library(tinytest) + +# +# Datasets and models ---- + +data("base_did", package = "fixest") + +est = fixest::feols( + fml = y ~ x1 + i(period, treat, 5) | id + period, + data = base_did +) + +aggr_post = aggr_es(est) # default post +aggr_cum = aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects +aggr_pre = aggr_es(est, period = "pre") # pre period instead of post +aggr_both = aggr_es(est, period = "both") # pre & post periods separately +aggr_rhs1 = aggr_es(est, period = "pre", rhs = -1) # pre period with H0 value of 1 + +# +# Known output ---- + +aggr_post_known = data.frame( + term = "post-treatment (mean)", + estimate = 3.906554122950695, + std.error = 0.8598575665281263, + statistic = 4.543257249830702, + p.value = 5.5391585244779775e-06, + s.value = 17.461901741925015, + conf.low = 2.2212642607213144, + conf.high = 5.591843985180075 +) + + +aggr_cum_known = data.frame( + term = "post-treatment (cumulative)", + estimate = 19.532770614753474, + std.error = 4.299287821377354, + statistic = 4.543257261733131, + p.value = 5.539158211582727e-06, + s.value = 17.461901823419783, + conf.low = 11.106321325682188, + conf.high = 27.95921990382476 +) + +aggr_pre_known = data.frame( + term = "pre-treatment (mean)", + estimate = -1.1798706992411545, + std.error = 0.8561963882056884, + statistic = -1.3780374637106132, + p.value = 0.16819172163573184, + s.value = 2.5718213967199377, + conf.low = -2.857984783817578, + conf.high = 0.49824338533526924 +) + +aggr_both_known = + data.frame( + term = c("pre-treatment (mean)", "post-treatment (mean)"), + estimate = c(-1.1798706992411545, 3.906554122950695), + std.error = c(0.8561963882056884, 0.8598575665281263), + statistic = c(-1.3780374637106132, 4.543257249830702), + p.value = c(0.16819172163573184, 5.5391585244779775e-06), + s.value = c(2.5718213967199377, 17.461901741925015), + conf.low = c(-2.857984783817578, 2.2212642607213144), + conf.high = c(0.49824338533526924, 5.591843985180075) + ) + +aggr_rhs1_known = data.frame( + term = "pre-treatment (mean)", + estimate = -0.1798706992411545, + std.error = 0.8561963882056884, + statistic = -0.21008112358206216, + p.value = 0.8336043579365446, + s.value = 0.26256527509602834, + conf.low = -1.8579847838175783, + conf.high = 1.4982433853352692 +) + + +# +# tests ---- +tol = 1e-6 + +for (col in c("term", "estimate", "std.error", "statistic", "p.value", "s.value", + "conf.low", "conf.high")) { + expect_equivalent(aggr_post[[col]], aggr_post_known[[col]], tolerance = tol) + expect_equivalent(aggr_cum[[col]], aggr_cum_known[[col]], tolerance = tol) + expect_equivalent(aggr_pre[[col]], aggr_pre_known[[col]], tolerance = tol) + expect_equivalent(aggr_both[[col]], aggr_both_known[[col]], tolerance = tol) + expect_equivalent(aggr_rhs1[[col]], aggr_rhs1_known[[col]], tolerance = tol) +} From 3853d0d3f47570a9c3c8bfc057d621e88a23a241 Mon Sep 17 00:00:00 2001 From: Grant McDermott Date: Mon, 11 Dec 2023 16:10:08 -0800 Subject: [PATCH 4/5] NEWS and version bump --- DESCRIPTION | 2 +- NEWS.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a69c749..8188bcd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ggfixest Title: Dedicated ggplot2 methods for fixest objects -Version: 0.0.3 +Version: 0.0.3.9000 Authors@R: c(person(given = "Grant", family = "McDermott", diff --git a/NEWS.md b/NEWS.md index 1e6cdc9..1adbcbd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# ggfixest 0.0.3.9000 (development version) + +## New features + +- The `aggr_es` function now supports numeric sequences for aggregating +specific period subsets, in addition to the existing keyword strings like "pre" +or "post". This functionality passes through to the higher order functions that +call `aggr_es` under the hood. (#33) + + # ggfixest 0.0.3 ## Breaking change From a27c968aa5ef48c46d70516ba6715c99f4ac9b7f Mon Sep 17 00:00:00 2001 From: Grant McDermott Date: Mon, 11 Dec 2023 16:14:55 -0800 Subject: [PATCH 5/5] docs --- R/aggr_es.R | 2 +- man/aggr_es.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/aggr_es.R b/R/aggr_es.R index 69317b6..6060248 100644 --- a/R/aggr_es.R +++ b/R/aggr_es.R @@ -32,7 +32,7 @@ #' hypothesis test is also provided as an attribute. See Examples. #' @export #' @examples -#' library(fixest) +#' library(ggfixest) ## Will load fixest too #' #' est = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did) #' diff --git a/man/aggr_es.Rd b/man/aggr_es.Rd index 2c5df9c..d313252 100644 --- a/man/aggr_es.Rd +++ b/man/aggr_es.Rd @@ -54,7 +54,7 @@ effects too. At its heart, \code{aggr_es()} is a convenience wrapper around joint hypothesis test. } \examples{ -library(fixest) +library(ggfixest) ## Will load fixest too est = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did)