Skip to content

Commit

Permalink
Support numeric aggregation (#33)
Browse files Browse the repository at this point in the history
* Support numeric sequence for period argument of aggr_es.

* Better example

* Add tests

* NEWS and version bump

* docs
  • Loading branch information
grantmcdermott authored Dec 12, 2023
1 parent f55f0dc commit 4b97b9d
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 45 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ggfixest
Title: Dedicated ggplot2 methods for fixest objects
Version: 0.0.3
Version: 0.0.3.9000
Authors@R:
c(person(given = "Grant",
family = "McDermott",
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# ggfixest 0.0.3.9000 (development version)

## New features

- The `aggr_es` function now supports numeric sequences for aggregating
specific period subsets, in addition to the existing keyword strings like "pre"
or "post". This functionality passes through to the higher order functions that
call `aggr_es` under the hood. (#33)


# ggfixest 0.0.3

## Breaking change
Expand Down
45 changes: 34 additions & 11 deletions R/aggr_es.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
#' @param object A model object of class `fixest`, where the `i()` operator has
#' been used to facilitate an "event-study" DiD design. See Examples.
#' @param rhs Numeric. The null hypothesis value. Defaults to 0.
#' @param period Character string. Which group of periods are we aggregating?
#' One of "post" (the default), "prep", or "both".
#' @param period Keyword string or numeric sequence. Which group of periods
#' are we aggregating? Can either be one of three convenience strings---i.e.,
#' "post" (the default), "prep", or "both"---or a numeric sequence that matches
#' a subset of periods in the data (e.g. 6:8).
#' @param aggregation Character string. The aggregation type. Either "mean" (the
#' default) or "cumulative".
#' @param abbr_term Logical. Should the leading "term" column of the return
Expand All @@ -30,7 +32,7 @@
#' hypothesis test is also provided as an attribute. See Examples.
#' @export
#' @examples
#' library(fixest)
#' library(ggfixest) ## Will load fixest too
#'
#' est = feols(y ~ x1 + i(period, treat, 5) | id + period, base_did)
#'
Expand All @@ -40,19 +42,26 @@
#' attributes(post_mean)["hypothesis"]
#'
#' # Other hypothesis and aggregation options
#' aggr_es(est, aggregation = "cumulative")
#' aggr_es(est, period = "both")
#' aggr_es(est, rhs = -1, period = "pre")
#' aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects
#' aggr_es(est, period = "pre") # pre period instead of post
#' aggr_es(est, period = "both") # pre & post periods separately
#' aggr_es(est, period = 6:8) # specific subset of periods
#' aggr_es(est, rhs = -1, period = "pre") # pre period with H0 value of 1
#' # Etc.
#'
aggr_es = function(object,
rhs = 0,
period = c("post", "pre", "both"),
# period = c("post", "pre", "both"),
period = "post",
aggregation = c("mean", "cumulative"),
abbr_term = TRUE,
...) {
aggregation = match.arg(aggregation)
period = match.arg(period)
if (!(any(period %in% c("post", "pre", "both")) || is.numeric(period))) {
stop('The `period` argument must be one of c("post", "pre", "both"), or a numeric sequence.')
}
# period = match.arg(period)

fixest_obj = inherits(object, "fixest")
if (!fixest_obj) stop("Please provide a valid fixest object.")
mm = object$model_matrix_info[[1]]
Expand All @@ -65,11 +74,25 @@ aggr_es = function(object,
coefs = mm$coef_names_full
ref_id = mm$ref_id[1]
## Store our periods in a list to make the below lapply call easier
if (period == "post") {
if (is.numeric(period)) {
if (!all(period %in% mm$items)) {
stop(
'\nSupplied period sequence does not match periods in model object.',
'\nUser-supplied periods: ', period,
'Model periods: ', mm$items
)
}
if (any(period %in% mm$ref)) {
warning("\nThe reference period, ", mm$ref, ", cannot be included in the aggregation and will be dropped.")
period = setdiff(period, mm$ref)
}
idx = list(match(period, mm$items))
names(idx) = paste0("periods", paste(range(period), collapse=":"))
} else if (period == "post") {
idx = list("post" = (ref_id + 1):length(coefs))
} else if (period == "pre") { ## still need to handle the "both" option
idx = list("pre" = 1:(ref_id - 1)) # ref period is dropped from the model
} else {
} else if (period == "both") {
idx = list("pre" = 1:(ref_id - 1), "post" = (ref_id + 1):length(coefs))
}
## We're doing a bit more work than we need to here with the lapply call and
Expand All @@ -94,7 +117,7 @@ aggr_es = function(object,
hyp_attr = sapply(res, function(x) {attributes(x)["hypothesis"]})
res = do.call("rbind", res)
row.names(res) = NULL
if (period == "both") {
if (!is.numeric(period) && period == "both") {
attributes(res) = utils::modifyList(attributes(res), hyp_attr)
attributes(res)["hypothesis"] = NULL
}
Expand Down
10 changes: 6 additions & 4 deletions R/ggcoefplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
#' a continuous relationship among the coefficients.
#' @param multi_style Character string. One of `c('dodge', 'facet')`, defining
#' how multi-model objects should be presented.
#' @param aggr_eff A character string indicating whether the aggregated mean
#' post- (and/or pre-) treatment effect should be plotted alongside the
#' individual period effects. Should be one of "none" (the default), "post",
#' "pre", or "both".
#' @param aggr_eff A keyword string or numeric sequence, indicating whether
#' mean treatment effects for some subset of the model should be displayed as
#' part of the plot. For example, the "post" keyword means that the mean
#' post-treatment effect will be plotted alongside the individual period
#' effects. Passed to [`aggr_es`]; see that function's documentation for other
#' valid options.
#' @param aggr_eff.par List. Parameters of the aggregated treatment effect line,
#' if plotted. The default values are `col = 'gray50'`, `lwd = 1`, `lty = 1`.
#' @param facet_args A list of arguments passed down to `ggplot::fact_wrap()`.
Expand Down
7 changes: 4 additions & 3 deletions R/ggiplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ggiplot = function(
object,
geom_style = c('pointrange', 'errorbar', 'ribbon'),
multi_style = c('dodge', 'facet'),
aggr_eff = c('none', 'post', 'pre', 'both'),
aggr_eff = NULL,
aggr_eff.par = list(col = 'grey50', lwd = 1, lty = 1),
facet_args = NULL,
theme = NULL,
Expand All @@ -17,7 +17,8 @@ ggiplot = function(

geom_style = match.arg(geom_style)
multi_style = match.arg(multi_style)
aggr_eff = match.arg(aggr_eff)
# aggr_eff = match.arg(aggr_eff)
if (is.null(aggr_eff)) aggr_eff = "none"
aggr_eff.par = utils::modifyList(list(col = "grey50", lwd = 1, lty = 1), aggr_eff.par)

dots = list(...)
Expand Down Expand Up @@ -217,7 +218,7 @@ ggiplot = function(
}
} +
{
if (aggr_eff != "none") {
if (is.numeric(aggr_eff) || aggr_eff != "none") {
geom_line(aes(y = aggr_eff), col = aggr_eff.par$col, lwd = aggr_eff.par$lwd, lty = aggr_eff.par$lty)
}
} +
Expand Down
23 changes: 15 additions & 8 deletions R/iplot_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
#' variables created with i. This is an index, just try increasing numbers to
#' hopefully obtain what you want. Passed down to
#' `fixest::iplot(..., i.select = .i.select)`
#' @param .aggr_es A character string indicating whether the aggregated mean
#' post- (and/or pre-) treatment effect should be added as a column to the
#' returned data frame. Passed to `aggr_es(..., aggregation = "mean")` and
#' should be one of "none" (the default), "post", "pre", or "both".
#' @param .aggr_es A keyword string or numeric sequence indicating whether the
#' aggregated mean treatment effects for some subset of the model should be
#' added as a column to the returned data frame. Passed to
#' `aggr_es(..., aggregation = "mean")`.
#' @details This function is a wrapper around
#' `fixest::iplot(..., only.params = TRUE)`, but with various checks and tweaks
#' to better facilitate plotting with `ggplot2` and handling of complex object
Expand Down Expand Up @@ -75,14 +75,16 @@ iplot_data = function(
.dict = fixest::getFixest_dict(),
.internal.only.i = TRUE,
.i.select = 1,
.aggr_es = c("none", "post", "pre", "both"),
# .aggr_es = c("none", "post", "pre", "both"),
.aggr_es = NULL,
.group = "auto"
) {

.aggr_es = match.arg(.aggr_es)
# .aggr_es = match.arg(.aggr_es)
if (is.null(.aggr_es)) .aggr_es = "none"
if (isFALSE(.internal.only.i)) {
## No pre/post aggregation allowed for coefplot
if (.aggr_es!="none") warning("The .aggr_es argument will be ignored with (gg)coefplot calls.\n")
if (is.numeric(.aggr_es) || .aggr_es!="none") warning("The .aggr_es argument will be ignored with (gg)coefplot calls.\n")
.aggr_es = "none"
}

Expand Down Expand Up @@ -382,7 +384,12 @@ iplot_data = function(
}


if (.aggr_es != "none") {
if (is.numeric(.aggr_es)) {
ea = aggr_es(object, period = .aggr_es)
d$aggr_eff = NA
# d$aggr_eff[match(.aggr_es, )]
d$aggr_eff[match(.aggr_es, d$estimate_names)] = ea$estimate[1]
} else if (.aggr_es != "none") {
ea = aggr_es(object, period = .aggr_es)
ref_idx = which(d$is_ref)
d$aggr_eff = 0
Expand Down
92 changes: 92 additions & 0 deletions inst/tinytest/test_aggr_es.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
library(ggfixest)
library(tinytest)

#
# Datasets and models ----

data("base_did", package = "fixest")

est = fixest::feols(
fml = y ~ x1 + i(period, treat, 5) | id + period,
data = base_did
)

aggr_post = aggr_es(est) # default post
aggr_cum = aggr_es(est, aggregation = "cumulative") # cumulative instead of mean effects
aggr_pre = aggr_es(est, period = "pre") # pre period instead of post
aggr_both = aggr_es(est, period = "both") # pre & post periods separately
aggr_rhs1 = aggr_es(est, period = "pre", rhs = -1) # pre period with H0 value of 1

#
# Known output ----

aggr_post_known = data.frame(
term = "post-treatment (mean)",
estimate = 3.906554122950695,
std.error = 0.8598575665281263,
statistic = 4.543257249830702,
p.value = 5.5391585244779775e-06,
s.value = 17.461901741925015,
conf.low = 2.2212642607213144,
conf.high = 5.591843985180075
)


aggr_cum_known = data.frame(
term = "post-treatment (cumulative)",
estimate = 19.532770614753474,
std.error = 4.299287821377354,
statistic = 4.543257261733131,
p.value = 5.539158211582727e-06,
s.value = 17.461901823419783,
conf.low = 11.106321325682188,
conf.high = 27.95921990382476
)

aggr_pre_known = data.frame(
term = "pre-treatment (mean)",
estimate = -1.1798706992411545,
std.error = 0.8561963882056884,
statistic = -1.3780374637106132,
p.value = 0.16819172163573184,
s.value = 2.5718213967199377,
conf.low = -2.857984783817578,
conf.high = 0.49824338533526924
)

aggr_both_known =
data.frame(
term = c("pre-treatment (mean)", "post-treatment (mean)"),
estimate = c(-1.1798706992411545, 3.906554122950695),
std.error = c(0.8561963882056884, 0.8598575665281263),
statistic = c(-1.3780374637106132, 4.543257249830702),
p.value = c(0.16819172163573184, 5.5391585244779775e-06),
s.value = c(2.5718213967199377, 17.461901741925015),
conf.low = c(-2.857984783817578, 2.2212642607213144),
conf.high = c(0.49824338533526924, 5.591843985180075)
)

aggr_rhs1_known = data.frame(
term = "pre-treatment (mean)",
estimate = -0.1798706992411545,
std.error = 0.8561963882056884,
statistic = -0.21008112358206216,
p.value = 0.8336043579365446,
s.value = 0.26256527509602834,
conf.low = -1.8579847838175783,
conf.high = 1.4982433853352692
)


#
# tests ----
tol = 1e-6

for (col in c("term", "estimate", "std.error", "statistic", "p.value", "s.value",
"conf.low", "conf.high")) {
expect_equivalent(aggr_post[[col]], aggr_post_known[[col]], tolerance = tol)
expect_equivalent(aggr_cum[[col]], aggr_cum_known[[col]], tolerance = tol)
expect_equivalent(aggr_pre[[col]], aggr_pre_known[[col]], tolerance = tol)
expect_equivalent(aggr_both[[col]], aggr_both_known[[col]], tolerance = tol)
expect_equivalent(aggr_rhs1[[col]], aggr_rhs1_known[[col]], tolerance = tol)
}
18 changes: 11 additions & 7 deletions man/aggr_es.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions man/ggcoefplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4b97b9d

Please sign in to comment.