Support numeric aggregation (#33)

* Support numeric sequence for period argument of aggr_es. * Better example * Add tests * NEWS and version bump * docs
grantmcdermott · Dec 12, 2023 · 4b97b9d · 4b97b9d
1 parent f55f0dc
commit 4b97b9d
Show file tree

Hide file tree

Showing 10 changed files with 186 additions and 45 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ggfixest
 Title: Dedicated ggplot2 methods for fixest objects
-Version: 0.0.3
+Version: 0.0.3.9000
 Authors@R: 
     c(person(given   = "Grant",
              family  = "McDermott",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,13 @@
+# ggfixest 0.0.3.9000 (development version)
+
+## New features
+
+- The `aggr_es` function now supports numeric sequences for aggregating
+specific period subsets, in addition to the existing keyword strings like "pre"
+or "post". This functionality passes through to the higher order functions that
+call `aggr_es` under the hood. (#33)
+
+
 # ggfixest 0.0.3
 
 ## Breaking change

diff --git a/R/aggr_es.R b/R/aggr_es.R
@@ -13,8 +13,10 @@
 #' @param object A model object of class `fixest`, where the `i()` operator has
 #' been used to facilitate an "event-study" DiD design. See Examples.
 #' @param rhs Numeric. The null hypothesis value. Defaults to 0.
-#' @param period Character string. Which group of periods are we aggregating?
-#' One of "post" (the default), "prep", or "both".
+#' @param period Keyword string or numeric sequence. Which group of periods
+#' are we aggregating? Can either be one of three convenience strings---i.e.,
+#' "post" (the default), "prep", or "both"---or a numeric sequence that matches
+#' a subset of periods in the data (e.g. 6:8).
 #' @param aggregation Character string. The aggregation type. Either "mean" (the
 #' default) or "cumulative".
 #' @param abbr_term Logical. Should the leading "term" column of the return
@@ -30,7 +32,7 @@
 #' hypothesis test is also provided as an attribute. See Examples.
 #' @export
 #' @examples
-#' library(fixest)
+#' library(ggfixest) ## Will load fixest too
 #'
 #' est = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did)
 #'
@@ -40,19 +42,26 @@
 #' attributes(post_mean)["hypothesis"]
 #'
 #' # Other hypothesis and aggregation options
-#' aggr_es(est, aggregation = "cumulative")
-#' aggr_es(est, period = "both")
-#' aggr_es(est, rhs = -1, period = "pre")
+#' aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects
+#' aggr_es(est, period = "pre")             # pre period instead of post
+#' aggr_es(est, period = "both")            # pre & post periods separately
+#' aggr_es(est, period = 6:8)               # specific subset of periods
+#' aggr_es(est, rhs = -1, period = "pre")   # pre period with H0 value of 1
 #' # Etc.
 #'
 aggr_es = function(object,
                    rhs = 0,
-                   period = c("post", "pre", "both"),
+                   # period = c("post", "pre", "both"),
+                   period = "post",
                    aggregation = c("mean", "cumulative"),
                    abbr_term = TRUE,
                    ...) {
     aggregation = match.arg(aggregation)
-    period = match.arg(period)
+    if (!(any(period %in% c("post", "pre", "both")) || is.numeric(period))) {
+        stop('The `period` argument must be one of c("post", "pre", "both"), or a numeric sequence.')
+    }
+    # period = match.arg(period)
+
     fixest_obj = inherits(object, "fixest")
     if (!fixest_obj) stop("Please provide a valid fixest object.")
     mm = object$model_matrix_info[[1]]
@@ -65,11 +74,25 @@ aggr_es = function(object,
     coefs = mm$coef_names_full
     ref_id = mm$ref_id[1]
     ## Store our periods in a list to make the below lapply call easier
-    if (period == "post") {
+    if (is.numeric(period)) {
+        if (!all(period %in% mm$items)) {
+            stop(
+                '\nSupplied period sequence does not match periods in model object.',
+                '\nUser-supplied periods: ', period,
+                'Model periods: ', mm$items
+            )
+        }
+        if (any(period %in% mm$ref)) {
+            warning("\nThe reference period, ", mm$ref, ", cannot be included in the aggregation and will be dropped.")
+            period = setdiff(period, mm$ref)
+        }
+        idx = list(match(period, mm$items))
+        names(idx) = paste0("periods", paste(range(period), collapse=":"))
+    } else if (period == "post") {
         idx = list("post" = (ref_id + 1):length(coefs))
     } else if (period == "pre") { ## still need to handle the "both" option
         idx = list("pre" = 1:(ref_id - 1)) # ref period is dropped from the model
-    } else {
+    } else if (period == "both") {
         idx = list("pre" = 1:(ref_id - 1), "post" = (ref_id + 1):length(coefs))
     }
     ## We're doing a bit more work than we need to here with the lapply call and
@@ -94,7 +117,7 @@ aggr_es = function(object,
     hyp_attr = sapply(res, function(x) {attributes(x)["hypothesis"]})
     res = do.call("rbind", res)
     row.names(res) = NULL
-    if (period == "both") {
+    if (!is.numeric(period) && period == "both") {
         attributes(res) = utils::modifyList(attributes(res), hyp_attr)
         attributes(res)["hypothesis"] = NULL
     }

diff --git a/R/ggcoefplot.R b/R/ggcoefplot.R
@@ -18,10 +18,12 @@
 #'   a continuous relationship among the coefficients.
 #' @param multi_style Character string. One of `c('dodge', 'facet')`, defining
 #'   how multi-model objects should be presented.
-#' @param aggr_eff A character string indicating whether the aggregated mean
-#'   post- (and/or pre-) treatment effect should be plotted alongside the
-#'   individual period effects. Should be one of "none" (the default), "post",
-#'   "pre", or "both".
+#' @param aggr_eff A keyword string or numeric sequence, indicating whether
+#'   mean treatment effects for some subset of the model should be displayed as
+#'   part of the plot. For example, the "post" keyword means that the mean
+#'   post-treatment effect will be plotted alongside the individual period
+#'   effects. Passed to [`aggr_es`]; see that function's documentation for other
+#'   valid options.
 #' @param aggr_eff.par List. Parameters of the aggregated treatment effect line,
 #'   if plotted. The default values are `col = 'gray50'`, `lwd = 1`, `lty = 1`.
 #' @param facet_args A list of arguments passed down to `ggplot::fact_wrap()`.

diff --git a/R/ggiplot.R b/R/ggiplot.R
@@ -8,7 +8,7 @@ ggiplot = function(
 	object,
 	geom_style = c('pointrange', 'errorbar', 'ribbon'),
 	multi_style = c('dodge', 'facet'),
-	aggr_eff = c('none', 'post', 'pre', 'both'),
+	aggr_eff = NULL,
 	aggr_eff.par = list(col = 'grey50', lwd = 1, lty = 1),
 	facet_args = NULL,
 	theme = NULL,
@@ -17,7 +17,8 @@ ggiplot = function(
 
   geom_style = match.arg(geom_style)
   multi_style = match.arg(multi_style)
-  aggr_eff = match.arg(aggr_eff)
+  # aggr_eff = match.arg(aggr_eff)
+  if (is.null(aggr_eff)) aggr_eff = "none"
   aggr_eff.par = utils::modifyList(list(col = "grey50", lwd = 1, lty = 1), aggr_eff.par)
 
   dots = list(...)
@@ -217,7 +218,7 @@ ggiplot = function(
           }
       } +
       {
-          if (aggr_eff != "none") {
+          if (is.numeric(aggr_eff) || aggr_eff != "none") {
               geom_line(aes(y = aggr_eff), col = aggr_eff.par$col, lwd = aggr_eff.par$lwd, lty = aggr_eff.par$lty)
           }
       } +

diff --git a/R/iplot_data.R b/R/iplot_data.R
@@ -38,10 +38,10 @@
 #'   variables created with i. This is an index, just try increasing numbers to
 #'   hopefully obtain what you want. Passed down to
 #'   `fixest::iplot(..., i.select = .i.select)`
-#' @param .aggr_es A character string indicating whether the aggregated mean
-#' post- (and/or pre-) treatment effect should be added as a column to the
-#' returned data frame. Passed to `aggr_es(..., aggregation = "mean")` and
-#' should be one of "none" (the default), "post", "pre", or "both".
+#' @param .aggr_es A keyword string or numeric sequence indicating whether the
+#' aggregated mean treatment effects for some subset of the model should be
+#' added as a column to the returned data frame. Passed to
+#' `aggr_es(..., aggregation = "mean")`.
 #' @details This function is a wrapper around
 #' `fixest::iplot(..., only.params = TRUE)`, but with various checks and tweaks
 #' to better facilitate plotting with `ggplot2` and handling of complex object
@@ -75,14 +75,16 @@ iplot_data = function(
 		.dict = fixest::getFixest_dict(),
 		.internal.only.i = TRUE,
 		.i.select = 1,
-		.aggr_es = c("none", "post", "pre", "both"),
+		# .aggr_es = c("none", "post", "pre", "both"),
+		.aggr_es = NULL,
 		.group = "auto"
 	) {
 
-	.aggr_es = match.arg(.aggr_es)
+	# .aggr_es = match.arg(.aggr_es)
+	if (is.null(.aggr_es)) .aggr_es = "none"
 	if (isFALSE(.internal.only.i)) {
 		## No pre/post aggregation allowed for coefplot
-		if (.aggr_es!="none") warning("The .aggr_es argument will be ignored with (gg)coefplot calls.\n")
+		if (is.numeric(.aggr_es) || .aggr_es!="none") warning("The .aggr_es argument will be ignored with (gg)coefplot calls.\n")
 		.aggr_es = "none"
 	}
 
@@ -382,7 +384,12 @@ iplot_data = function(
   }
 
 
-  if (.aggr_es != "none") {
+  if (is.numeric(.aggr_es)) {
+  	ea = aggr_es(object, period = .aggr_es)
+  	d$aggr_eff = NA
+  	# d$aggr_eff[match(.aggr_es, )]
+  	d$aggr_eff[match(.aggr_es, d$estimate_names)] = ea$estimate[1]
+  } else if (.aggr_es != "none") {
   	ea = aggr_es(object, period = .aggr_es)
   	ref_idx = which(d$is_ref)
   	d$aggr_eff = 0

diff --git a/inst/tinytest/test_aggr_es.R b/inst/tinytest/test_aggr_es.R
@@ -0,0 +1,92 @@
+library(ggfixest)
+library(tinytest)
+
+#
+# Datasets and models ----
+
+data("base_did", package = "fixest")
+
+est = fixest::feols(
+	fml = y ~ x1 + i(period, treat, 5) | id + period,
+	data = base_did
+)
+
+aggr_post = aggr_es(est)                             # default post
+aggr_cum  = aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects
+aggr_pre  = aggr_es(est, period = "pre")             # pre period instead of post
+aggr_both = aggr_es(est, period = "both")            # pre & post periods separately
+aggr_rhs1 = aggr_es(est, period = "pre", rhs = -1)   # pre period with H0 value of 1
+
+#
+# Known output ----
+
+aggr_post_known = data.frame(
+	term = "post-treatment (mean)",
+	estimate = 3.906554122950695,
+	std.error = 0.8598575665281263,
+	statistic = 4.543257249830702,
+	p.value = 5.5391585244779775e-06,
+	s.value = 17.461901741925015,
+	conf.low = 2.2212642607213144,
+	conf.high = 5.591843985180075
+)
+
+
+aggr_cum_known = data.frame(
+	term = "post-treatment (cumulative)",
+	estimate = 19.532770614753474,
+	std.error = 4.299287821377354,
+	statistic = 4.543257261733131,
+	p.value = 5.539158211582727e-06,
+	s.value = 17.461901823419783,
+	conf.low = 11.106321325682188,
+	conf.high = 27.95921990382476
+)
+
+aggr_pre_known = data.frame(
+	term = "pre-treatment (mean)",
+	estimate = -1.1798706992411545,
+	std.error = 0.8561963882056884,
+	statistic = -1.3780374637106132,
+	p.value = 0.16819172163573184,
+	s.value = 2.5718213967199377,
+	conf.low = -2.857984783817578,
+	conf.high = 0.49824338533526924
+)
+
+aggr_both_known =
+	data.frame(
+		term = c("pre-treatment (mean)", "post-treatment (mean)"),
+		estimate = c(-1.1798706992411545, 3.906554122950695),
+		std.error = c(0.8561963882056884, 0.8598575665281263),
+		statistic = c(-1.3780374637106132, 4.543257249830702),
+		p.value = c(0.16819172163573184, 5.5391585244779775e-06),
+		s.value = c(2.5718213967199377, 17.461901741925015),
+		conf.low = c(-2.857984783817578, 2.2212642607213144),
+		conf.high = c(0.49824338533526924, 5.591843985180075)
+	)
+
+aggr_rhs1_known = data.frame(
+	term = "pre-treatment (mean)",
+	estimate = -0.1798706992411545,
+	std.error = 0.8561963882056884,
+	statistic = -0.21008112358206216,
+	p.value = 0.8336043579365446,
+	s.value = 0.26256527509602834,
+	conf.low = -1.8579847838175783,
+	conf.high = 1.4982433853352692
+)
+
+
+#
+# tests ----
+tol = 1e-6
+
+for (col in c("term", "estimate", "std.error", "statistic", "p.value", "s.value",
+							"conf.low", "conf.high")) {
+	expect_equivalent(aggr_post[[col]], aggr_post_known[[col]], tolerance = tol)
+	expect_equivalent(aggr_cum[[col]], aggr_cum_known[[col]], tolerance = tol)
+	expect_equivalent(aggr_pre[[col]], aggr_pre_known[[col]], tolerance = tol)
+	expect_equivalent(aggr_both[[col]], aggr_both_known[[col]], tolerance = tol)
+	expect_equivalent(aggr_rhs1[[col]], aggr_rhs1_known[[col]], tolerance = tol)
+}
diff --git a/man/aggr_es.Rd b/man/aggr_es.Rd
diff --git a/man/ggcoefplot.Rd b/man/ggcoefplot.Rd