From 2f3bacffb7d1b953afb7bfd724cfc4cd0c3ab6b7 Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 09:52:21 -0500 Subject: [PATCH 1/9] Suppress Wald pre-test with message when extra clustering is used When clustervars includes a variable beyond idname, the analytical variance matrix V = t(inffunc) %*% inffunc / n ignores between-cluster correlation, making the Wald pre-test anti-conservative. Instead of silently producing a misleading p-value, emit a message() explaining why the test is skipped and direct users to bootstrap CIs. The test remains valid (and is reported as before) for balanced panels, unbalanced panels (influence functions aggregated to unit level), and repeated cross-sections (CLT applies per independent observation). Closes #186. --- R/att_gt.R | 105 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 37 deletions(-) diff --git a/R/att_gt.R b/R/att_gt.R index 5b3674b5..8c539559 100644 --- a/R/att_gt.R +++ b/R/att_gt.R @@ -378,46 +378,77 @@ att_gt <- function(yname, # compute Wald pre-test #----------------------------------------------------------------------------- - # select which periods are pre-treatment - pre <- which(group > tt) - - # Drop group-periods that have variance equal to zero (singularity problems) - if (length(zero_na_sd_entry) > 0) { - pre <- pre[!(pre %in% zero_na_sd_entry)] - } - # pseudo-atts in pre-treatment periods - preatt <- as.matrix(att[pre]) - - # covariance matrix of pre-treatment atts - preV <- as.matrix(V[pre, pre]) - - # check if there are actually any pre-treatment periods - if (length(preV) == 0) { - msg <- paste0( - "No pre-treatment periods available for the Wald pre-test of parallel trends. ", - "This can happen when all groups are first treated early in the panel ", - "(e.g., in the second time period) so that no pre-treatment ATT(g,t) estimates exist." + # Determine whether the analytical V is a valid basis for the Wald pre-test. + # V = t(inffunc) %*% inffunc / n is valid for balanced panels, unbalanced + # panels (influence functions are aggregated to the unit level), and repeated + # cross-sections (CLT applies per independent observation). + # + # It is invalid when an extra cluster variable (beyond idname) is specified: + # the analytical V ignores between-cluster correlation, making the Wald stat + # anti-conservative. In that case we skip the test and message the user to + # rely on bootstrap confidence intervals instead. + + extra_clustervars <- clustervars[!(clustervars %in% c(idname, ""))] + extra_clustervars <- extra_clustervars[!is.null(extra_clustervars)] + + wald_invalid <- NULL + if (length(extra_clustervars) > 0) { + wald_invalid <- paste0( + "The Wald pre-test is not reported when clustering beyond the unit level ", + "(clustervars = '", paste(extra_clustervars, collapse = "', '"), "') ", + "because the analytical variance matrix does not account for between-cluster ", + "correlation. Use the bootstrap confidence intervals to assess pre-trends." ) - if (anticipation > 0) { - msg <- paste0(msg, " Note: anticipation=", anticipation, " further reduces the number of available pre-treatment periods.") - } - warning(msg) - W <- NULL - Wpval <- NULL - } else if (sum(is.na(preV))) { - warning("Not returning pre-test Wald statistic due to NA pre-treatment values") - W <- NULL - Wpval <- NULL - } else if (rcond(preV) <= .Machine$double.eps) { - # singular covariance matrix for pre-treatment periods - warning("Not returning pre-test Wald statistic due to singular covariance matrix") + } + + if (!is.null(wald_invalid)) { + message(wald_invalid) W <- NULL Wpval <- NULL - } else { - # everything is working... - W <- n * t(preatt) %*% solve(preV) %*% preatt - q <- length(pre) # number of restrictions - Wpval <- round(1 - pchisq(W, q), 5) + } + + if (is.null(wald_invalid)) { + # select which periods are pre-treatment + pre <- which(group > tt) + + # Drop group-periods that have variance equal to zero (singularity problems) + if (length(zero_na_sd_entry) > 0) { + pre <- pre[!(pre %in% zero_na_sd_entry)] + } + # pseudo-atts in pre-treatment periods + preatt <- as.matrix(att[pre]) + + # covariance matrix of pre-treatment atts + preV <- as.matrix(V[pre, pre]) + + # check if there are actually any pre-treatment periods + if (length(preV) == 0) { + msg <- paste0( + "No pre-treatment periods available for the Wald pre-test of parallel trends. ", + "This can happen when all groups are first treated early in the panel ", + "(e.g., in the second time period) so that no pre-treatment ATT(g,t) estimates exist." + ) + if (anticipation > 0) { + msg <- paste0(msg, " Note: anticipation=", anticipation, " further reduces the number of available pre-treatment periods.") + } + warning(msg) + W <- NULL + Wpval <- NULL + } else if (sum(is.na(preV))) { + warning("Not returning pre-test Wald statistic due to NA pre-treatment values") + W <- NULL + Wpval <- NULL + } else if (rcond(preV) <= .Machine$double.eps) { + # singular covariance matrix for pre-treatment periods + warning("Not returning pre-test Wald statistic due to singular covariance matrix") + W <- NULL + Wpval <- NULL + } else { + # everything is working... + W <- n * t(preatt) %*% solve(preV) %*% preatt + q <- length(pre) # number of restrictions + Wpval <- round(1 - pchisq(W, q), 5) + } } From 93d692e8ef0d6c378e36c08881090af4d59759db Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 11:34:04 -0500 Subject: [PATCH 2/9] Fix clustervars = c(idname, extra_var) crash in faster_mode path Two bugs fixed: 1. pre_process_did2.R: dreamerr's 'NULL | match' enforces scalar length, so passing clustervars = c(idname, extra_var) failed before idname could be stripped. Fix: strip idname from clustervars before the dreamerr check in validate_args, and again in the main body (validate_args works on a local copy). The non-faster_mode path (mboot.R) already handled this. 2. att_gt.R: extra_clustervars[!is.null(extra_clustervars)] is a no-op for NULL but produces NA_character_ when applied to character(0), causing a spurious Wald suppression message. Fix: remove the redundant line since the filter already produces character(0) when no extra vars remain. Closes #175. --- R/att_gt.R | 1 - R/pre_process_did2.R | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/R/att_gt.R b/R/att_gt.R index 8c539559..b5663b41 100644 --- a/R/att_gt.R +++ b/R/att_gt.R @@ -389,7 +389,6 @@ att_gt <- function(yname, # rely on bootstrap confidence intervals instead. extra_clustervars <- clustervars[!(clustervars %in% c(idname, ""))] - extra_clustervars <- extra_clustervars[!is.null(extra_clustervars)] wald_invalid <- NULL if (length(extra_clustervars) > 0) { diff --git a/R/pre_process_did2.R b/R/pre_process_did2.R index f0e55c0e..733fbe56 100644 --- a/R/pre_process_did2.R +++ b/R/pre_process_did2.R @@ -19,6 +19,14 @@ validate_args <- function(args, data){ name_message <- "__ARG__ must be a character scalar and a name of a column from the dataset." dreamerr::check_set_arg(args$tname, args$gname, args$yname, "match", .choices = data_names, .message = name_message, .up = 1) + # Strip idname from clustervars before validation: users may naturally pass + # clustervars = c(idname, extra_var), and idname clustering is implicit/redundant. + # This must happen before the dreamerr check which enforces scalar length. + if (!is.null(args$clustervars) && !is.null(args$idname) && (args$idname %in% args$clustervars)) { + args$clustervars <- setdiff(args$clustervars, args$idname) + if (length(args$clustervars) == 0L) args$clustervars <- NULL + } + # Flag for clustervars and weightsname checkvar_message <- "__ARG__ must be NULL or a character scalar that is a name of a column from the dataset." dreamerr::check_set_arg(args$weightsname, args$clustervars, "NULL | match", .choices = data_names, .message = checkvar_message, .up = 1) @@ -60,13 +68,9 @@ validate_args <- function(args, data){ dreamerr::check_set_arg(args$base_period, "match", .choices = c("universal", "varying"), .message = base_period_message, .up = 1) # Flags for cluster variable + # Note: idname was already stripped from clustervars above (before dreamerr check) if (!is.null(args$clustervars)) { - # dropping idname from cluster - if ((!is.null(args$idname)) && (args$idname %in% args$clustervars)){ - args$clustervars <- setdiff(args$clustervars, args$idname) - } - - # check if user is providing more than 2 cluster variables (different than idname) + # check if user is providing more than 1 cluster variable (beyond idname) if (length(args$clustervars) > 1) { stop("You can only provide 1 cluster variable additionally to the one provided in idname. Please check your arguments.") } @@ -584,6 +588,14 @@ pre_process_did2 <- function(yname, # run error checking on arguments validate_args(args, data) + # Strip idname from clustervars in the caller's copy of args. + # validate_args works on a local copy, so we must repeat this here. + # Clustering at the unit level (idname) is already done implicitly. + if (!is.null(args$clustervars) && !is.null(args$idname) && (args$idname %in% args$clustervars)) { + args$clustervars <- setdiff(args$clustervars, args$idname) + if (length(args$clustervars) == 0L) args$clustervars <- NULL + } + # put in blank xformla if no covariates if (is.null(args$xformla)) { args$xformla <- ~1 From 9c008ee1ef04ff4cee0ae657d3232fb141ff406c Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 11:43:41 -0500 Subject: [PATCH 3/9] Add treatment irreversibility check to non-faster_mode path pre_process_did.R had this check commented out as 'too slow'. Replaced the slow sapply/split approach with unique() + anyDuplicated(), which is O(n) and matches the check already in pre_process_did2.R (faster_mode). Both paths now throw the same clear error when gname is not constant within a unit across periods. Closes #138. --- R/pre_process_did.R | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/R/pre_process_did.R b/R/pre_process_did.R index 7aee0802..3ee13c5e 100644 --- a/R/pre_process_did.R +++ b/R/pre_process_did.R @@ -202,22 +202,12 @@ pre_process_did <- function(yname, # make sure id is numeric if (! (is.numeric(data[, idname])) ) stop("The id variable '", idname, "' must be numeric. Please convert it.") - ## # checks below are useful, but removing due to being slow - ## # might ought to figure out a way to do these faster later - ## # these checks are also closely related to making sure - ## # that we have a well-balanced panel, so it might make - ## # sense to move them over to the BMisc package - - ## # Check if idname is unique by tname - ## n_id_year = all( table(data[, idname], data[, tname]) <= 1) - ## if (! n_id_year) stop("The value of idname must be the unique (by tname)") - - ## # make sure gname doesn't change across periods for particular individuals - ## if (!all(sapply( split(data, data[,idname]), function(df) { - ## length(unique(df[,gname]))==1 - ## }))) { - ## stop("The value of gname must be the same across all periods for each particular individual.") - ## } + # Check that gname is time-invariant within each unit (treatment irreversibility). + # Uses unique() + anyDuplicated() for O(n) performance. + id_g_unique <- unique(data[, c(idname, gname)]) + if (anyDuplicated(id_g_unique[[idname]]) != 0L) { + stop("The value of gname (treatment variable) must be the same across all periods for each particular unit. The treatment must be irreversible.") + } } From 2848590280c3561761d683d7b4c3f2258e1b6b01 Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 12:02:49 -0500 Subject: [PATCH 4/9] Improve roxygen documentation for tidy.MP and tidy.AGGTEobj Document all return columns explicitly, explain the distinction between conf.low/conf.high (simultaneous bootstrap bands) and point.conf.low/point.conf.high (pointwise qnorm intervals), and add a @details section clarifying when to use each. --- R/tidy.R | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 44a95659..b7847801 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -6,11 +6,27 @@ generics::tidy #' @export generics::glance -#' tidy results from MP objects +#' Tidy an MP object into a data frame +#' +#' Returns a tidy data frame of group-time average treatment effect estimates +#' from an [att_gt()] result. #' #' @importFrom generics tidy #' @param x a model of class MP produced by the [att_gt()] function #' @param ... Additional arguments to tidying method. +#' +#' @return A data frame with one row per ATT(g,t) estimate and columns: +#' \item{term}{ATT(g,t) label} +#' \item{group}{the treatment cohort g} +#' \item{time}{the time period t} +#' \item{estimate}{the ATT(g,t) point estimate} +#' \item{std.error}{standard error} +#' \item{conf.low, conf.high}{simultaneous confidence band limits, using the +#' bootstrap uniform critical value when \code{bstrap=TRUE} and +#' \code{cband=TRUE}, otherwise pointwise} +#' \item{point.conf.low, point.conf.high}{pointwise confidence interval limits +#' using \code{qnorm(1 - alp/2)}, always} +#' #' @export tidy.MP <- function(x, ...) { out <- data.frame( @@ -42,11 +58,37 @@ glance.MP <- function(x, ...) { out } -#' tidy results from AGGTEobj objects +#' Tidy an AGGTEobj into a data frame +#' +#' Returns a tidy data frame of aggregated treatment effect estimates from an +#' [aggte()] result. #' #' @importFrom generics tidy #' @param x a model of class AGGTEobj produced by the [aggte()] function #' @param ... Additional arguments to tidying method. +#' +#' @return A data frame whose columns depend on \code{type}: +#' \item{type}{the aggregation type: \code{"simple"}, \code{"dynamic"}, +#' \code{"group"}, or \code{"calendar"}} +#' \item{term}{label for each estimate} +#' \item{estimate}{point estimate} +#' \item{std.error}{standard error} +#' \item{conf.low, conf.high}{simultaneous confidence band limits. When +#' \code{bstrap=TRUE} and \code{cband=TRUE} these use the bootstrap uniform +#' critical value (\code{crit.val.egt}); otherwise they equal the pointwise +#' intervals. For \code{type="simple"} and the overall average row of +#' \code{type="group"}, a single scalar is returned so simultaneous and +#' pointwise coincide.} +#' \item{point.conf.low, point.conf.high}{pointwise confidence interval limits +#' always using \code{qnorm(1 - alp/2)}.} +#' +#' @details +#' The key distinction between \code{conf.low}/\code{conf.high} and +#' \code{point.conf.low}/\code{point.conf.high} is that the former accounts for +#' multiple testing across all estimates (simultaneous coverage), while the +#' latter provides marginal (per-estimate) coverage only. Use the simultaneous +#' bands when you want to make joint inferences across all event times or groups. +#' #' @export tidy.AGGTEobj<- function(x, ...) { if(x$type == "dynamic"){ From befc19ff256f3cab6a5ae79421efb0f0c0cca3fb Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 13:18:33 -0500 Subject: [PATCH 5/9] Add nobs() methods and statistic/p.value columns to tidy() output - Add nobs.MP and nobs.AGGTEobj S3 methods (closes #246) - Add statistic (t-stat) and p.value columns to tidy.MP and tidy.AGGTEobj following broom conventions: estimate | std.error | statistic | p.value | conf.low | conf.high (closes #243) - p.value is pointwise/marginal and documented as not accounting for multiple testing across ATT(g,t) cells - Covers all four aggregation types: dynamic, group, calendar, simple --- NAMESPACE | 3 +++ R/tidy.R | 46 +++++++++++++++++++++++++++++++++++++++++++- man/nobs.AGGTEobj.Rd | 19 ++++++++++++++++++ man/nobs.MP.Rd | 19 ++++++++++++++++++ man/tidy.AGGTEobj.Rd | 32 ++++++++++++++++++++++++++++-- man/tidy.MP.Rd | 22 +++++++++++++++++++-- 6 files changed, 136 insertions(+), 5 deletions(-) create mode 100644 man/nobs.AGGTEobj.Rd create mode 100644 man/nobs.MP.Rd diff --git a/NAMESPACE b/NAMESPACE index 89dad50f..c5dab2b9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,8 @@ S3method(ggdid,AGGTEobj) S3method(ggdid,MP) S3method(glance,AGGTEobj) S3method(glance,MP) +S3method(nobs,AGGTEobj) +S3method(nobs,MP) S3method(print,AGGTEobj) S3method(print,MP) S3method(summary,AGGTEobj) @@ -53,4 +55,5 @@ importFrom(generics,glance) importFrom(generics,tidy) importFrom(methods,as) importFrom(methods,is) +importFrom(stats,nobs) importFrom(tidyr,gather) diff --git a/R/tidy.R b/R/tidy.R index b7847801..44572993 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -6,6 +6,32 @@ generics::tidy #' @export generics::glance +#' Number of observations used to fit an MP object +#' +#' @importFrom stats nobs +#' @param object a model of class MP produced by the [att_gt()] function +#' @param ... additional arguments (ignored) +#' @return Integer. The number of unique units in the data. +#' @export +nobs.MP <- function(object, ...) { + object$n +} + +#' Number of observations used to fit an AGGTEobj object +#' +#' @importFrom stats nobs +#' @param object a model of class AGGTEobj produced by the [aggte()] function +#' @param ... additional arguments (ignored) +#' @return Integer. The number of unique units in the data. +#' @export +nobs.AGGTEobj <- function(object, ...) { + if (object$DIDparams$faster_mode) { + object$DIDparams$id_count + } else { + object$DIDparams$n + } +} + #' Tidy an MP object into a data frame #' #' Returns a tidy data frame of group-time average treatment effect estimates @@ -21,6 +47,10 @@ generics::glance #' \item{time}{the time period t} #' \item{estimate}{the ATT(g,t) point estimate} #' \item{std.error}{standard error} +#' \item{statistic}{t-statistic (\code{estimate / std.error})} +#' \item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +#' Marginal per-estimate; does \strong{not} account for multiple testing +#' across ATT(g,t) cells.} #' \item{conf.low, conf.high}{simultaneous confidence band limits, using the #' bootstrap uniform critical value when \code{bstrap=TRUE} and #' \code{cband=TRUE}, otherwise pointwise} @@ -35,6 +65,8 @@ tidy.MP <- function(x, ...) { time = x$t, estimate = x$att, std.error = x$se, + statistic = x$att / x$se, + p.value = 2 * (1 - stats::pnorm(abs(x$att / x$se))), conf.low = x$att - x$c * x$se, conf.high = x$att + x$c * x$se, point.conf.low = x$att - stats::qnorm(1 - x$alp/2) * x$se, @@ -73,6 +105,10 @@ glance.MP <- function(x, ...) { #' \item{term}{label for each estimate} #' \item{estimate}{point estimate} #' \item{std.error}{standard error} +#' \item{statistic}{t-statistic (\code{estimate / std.error})} +#' \item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +#' Marginal per-estimate; does \strong{not} account for multiple testing +#' across event times or groups.} #' \item{conf.low, conf.high}{simultaneous confidence band limits. When #' \code{bstrap=TRUE} and \code{cband=TRUE} these use the bootstrap uniform #' critical value (\code{crit.val.egt}); otherwise they equal the pointwise @@ -98,6 +134,8 @@ tidy.AGGTEobj<- function(x, ...) { event.time= x$egt, estimate = x$att.egt, std.error = x$se.egt, + statistic = x$att.egt / x$se.egt, + p.value = 2 * (1 - stats::pnorm(abs(x$att.egt / x$se.egt))), conf.low = x$att.egt - x$crit.val.egt * x$se.egt, conf.high = x$att.egt + x$crit.val.egt * x$se.egt, point.conf.low = x$att.egt - stats::qnorm(1 - x$DIDparams$alp/2) * x$se.egt, @@ -110,6 +148,8 @@ tidy.AGGTEobj<- function(x, ...) { group = c('Average', x$egt), estimate = c(x$overall.att, x$att.egt), std.error = c(x$overall.se, x$se.egt), + statistic = c(x$overall.att, x$att.egt) / c(x$overall.se, x$se.egt), + p.value = 2 * (1 - stats::pnorm(abs(c(x$overall.att, x$att.egt) / c(x$overall.se, x$se.egt)))), conf.low = c(x$overall.att - stats::qnorm(1 - x$DIDparams$alp/2) * x$overall.se, x$att.egt - x$crit.val.egt * x$se.egt), conf.high = c(x$overall.att + stats::qnorm(1 - x$DIDparams$alp/2) * x$overall.se, x$att.egt + x$crit.val.egt * x$se.egt), point.conf.low = c(x$overall.att - stats::qnorm(1 - x$DIDparams$alp/2) * x$overall.se, x$att.egt - stats::qnorm(1 - x$DIDparams$alp/2) * x$se.egt), @@ -123,18 +163,22 @@ tidy.AGGTEobj<- function(x, ...) { term = paste0('ATT(', x$egt, ")"), estimate = x$att.egt, std.error = x$se.egt, + statistic = x$att.egt / x$se.egt, + p.value = 2 * (1 - stats::pnorm(abs(x$att.egt / x$se.egt))), conf.low = x$att.egt - x$crit.val.egt * x$se.egt, conf.high = x$att.egt + x$crit.val.egt * x$se.egt, point.conf.low = x$att.egt - stats::qnorm(1 - x$DIDparams$alp/2) * x$se.egt, point.conf.high = x$att.egt + stats::qnorm(1 - x$DIDparams$alp/2) * x$se.egt) } - + if(x$type == "simple"){ out <- data.frame( type = x$type, term = 'ATT(simple average)', estimate = x$overall.att, std.error = x$overall.se, + statistic = x$overall.att / x$overall.se, + p.value = 2 * (1 - stats::pnorm(abs(x$overall.att / x$overall.se))), conf.low = x$overall.att - stats::qnorm(1 - x$DIDparams$alp/2) * x$overall.se, conf.high = x$overall.att + stats::qnorm(1 - x$DIDparams$alp/2) * x$overall.se, point.conf.low = x$overall.att - stats::qnorm(1 - x$DIDparams$alp/2) * x$overall.se, diff --git a/man/nobs.AGGTEobj.Rd b/man/nobs.AGGTEobj.Rd new file mode 100644 index 00000000..9e261250 --- /dev/null +++ b/man/nobs.AGGTEobj.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy.R +\name{nobs.AGGTEobj} +\alias{nobs.AGGTEobj} +\title{Number of observations used to fit an AGGTEobj object} +\usage{ +\method{nobs}{AGGTEobj}(object, ...) +} +\arguments{ +\item{object}{a model of class AGGTEobj produced by the \code{\link[=aggte]{aggte()}} function} + +\item{...}{additional arguments (ignored)} +} +\value{ +Integer. The number of unique units in the data. +} +\description{ +Number of observations used to fit an AGGTEobj object +} diff --git a/man/nobs.MP.Rd b/man/nobs.MP.Rd new file mode 100644 index 00000000..6d3bec68 --- /dev/null +++ b/man/nobs.MP.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy.R +\name{nobs.MP} +\alias{nobs.MP} +\title{Number of observations used to fit an MP object} +\usage{ +\method{nobs}{MP}(object, ...) +} +\arguments{ +\item{object}{a model of class MP produced by the \code{\link[=att_gt]{att_gt()}} function} + +\item{...}{additional arguments (ignored)} +} +\value{ +Integer. The number of unique units in the data. +} +\description{ +Number of observations used to fit an MP object +} diff --git a/man/tidy.AGGTEobj.Rd b/man/tidy.AGGTEobj.Rd index d714da7c..f9a51129 100644 --- a/man/tidy.AGGTEobj.Rd +++ b/man/tidy.AGGTEobj.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tidy.R \name{tidy.AGGTEobj} \alias{tidy.AGGTEobj} -\title{tidy results from AGGTEobj objects} +\title{Tidy an AGGTEobj into a data frame} \usage{ \method{tidy}{AGGTEobj}(x, ...) } @@ -11,6 +11,34 @@ \item{...}{Additional arguments to tidying method.} } +\value{ +A data frame whose columns depend on \code{type}: +\item{type}{the aggregation type: \code{"simple"}, \code{"dynamic"}, +\code{"group"}, or \code{"calendar"}} +\item{term}{label for each estimate} +\item{estimate}{point estimate} +\item{std.error}{standard error} +\item{statistic}{t-statistic (\code{estimate / std.error})} +\item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +Marginal per-estimate; does \strong{not} account for multiple testing +across event times or groups.} +\item{conf.low, conf.high}{simultaneous confidence band limits. When +\code{bstrap=TRUE} and \code{cband=TRUE} these use the bootstrap uniform +critical value (\code{crit.val.egt}); otherwise they equal the pointwise +intervals. For \code{type="simple"} and the overall average row of +\code{type="group"}, a single scalar is returned so simultaneous and +pointwise coincide.} +\item{point.conf.low, point.conf.high}{pointwise confidence interval limits +always using \code{qnorm(1 - alp/2)}.} +} \description{ -tidy results from AGGTEobj objects +Returns a tidy data frame of aggregated treatment effect estimates from an +\code{\link[=aggte]{aggte()}} result. +} +\details{ +The key distinction between \code{conf.low}/\code{conf.high} and +\code{point.conf.low}/\code{point.conf.high} is that the former accounts for +multiple testing across all estimates (simultaneous coverage), while the +latter provides marginal (per-estimate) coverage only. Use the simultaneous +bands when you want to make joint inferences across all event times or groups. } diff --git a/man/tidy.MP.Rd b/man/tidy.MP.Rd index 4391782a..9164265f 100644 --- a/man/tidy.MP.Rd +++ b/man/tidy.MP.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tidy.R \name{tidy.MP} \alias{tidy.MP} -\title{tidy results from MP objects} +\title{Tidy an MP object into a data frame} \usage{ \method{tidy}{MP}(x, ...) } @@ -11,6 +11,24 @@ \item{...}{Additional arguments to tidying method.} } +\value{ +A data frame with one row per ATT(g,t) estimate and columns: +\item{term}{ATT(g,t) label} +\item{group}{the treatment cohort g} +\item{time}{the time period t} +\item{estimate}{the ATT(g,t) point estimate} +\item{std.error}{standard error} +\item{statistic}{t-statistic (\code{estimate / std.error})} +\item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +Marginal per-estimate; does \strong{not} account for multiple testing +across ATT(g,t) cells.} +\item{conf.low, conf.high}{simultaneous confidence band limits, using the +bootstrap uniform critical value when \code{bstrap=TRUE} and +\code{cband=TRUE}, otherwise pointwise} +\item{point.conf.low, point.conf.high}{pointwise confidence interval limits +using \code{qnorm(1 - alp/2)}, always} +} \description{ -tidy results from MP objects +Returns a tidy data frame of group-time average treatment effect estimates +from an \code{\link[=att_gt]{att_gt()}} result. } From 40b7d53a2e1be62b053efb50c71d6a0cca57ff45 Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 13:26:20 -0500 Subject: [PATCH 6/9] Bump version to 2.3.1.903 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1e54baf8..2c28a058 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: did Title: Treatment Effects with Multiple Periods and Groups -Version: 2.3.1.902 +Version: 2.3.1.903 Authors@R: c(person("Brantly", "Callaway", email = "brantly.callaway@uga.edu", role = c("aut", "cre")), person("Pedro H. C.", "Sant'Anna", email="pedro.santanna@emory.edu", role = c("aut"))) URL: https://bcallaway11.github.io/did/, https://github.com/bcallaway11/did/ Description: The standard Difference-in-Differences (DID) setup involves two periods and two groups -- a treated group and untreated group. Many applications of DID methods involve more than two periods and have individuals that are treated at different points in time. This package contains tools for computing average treatment effect parameters in Difference in Differences setups with more than two periods and with variation in treatment timing using the methods developed in Callaway and Sant'Anna (2021) . The main parameters are group-time average treatment effects which are the average treatment effect for a particular group at a a particular time. These can be aggregated into a fewer number of treatment effect parameters, and the package deals with the cases where there is selective treatment timing, dynamic treatment effects, calendar time effects, or combinations of these. There are also functions for testing the Difference in Differences assumption, and plotting group-time average treatment effects. From 60fbe2197c53e5fa11dd52639b90234d0029d1d7 Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 13:58:49 -0500 Subject: [PATCH 7/9] Address Copilot review: fix Rd markup, t-statistic wording, add tidy/nobs tests --- R/tidy.R | 10 +++- man/tidy.AGGTEobj.Rd | 5 +- man/tidy.MP.Rd | 5 +- tests/testthat/test-tidy.R | 119 +++++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-tidy.R diff --git a/R/tidy.R b/R/tidy.R index 44572993..4439a75d 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -42,13 +42,15 @@ nobs.AGGTEobj <- function(object, ...) { #' @param ... Additional arguments to tidying method. #' #' @return A data frame with one row per ATT(g,t) estimate and columns: +#' \describe{ #' \item{term}{ATT(g,t) label} #' \item{group}{the treatment cohort g} #' \item{time}{the time period t} #' \item{estimate}{the ATT(g,t) point estimate} #' \item{std.error}{standard error} #' \item{statistic}{t-statistic (\code{estimate / std.error})} -#' \item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +#' \item{p.value}{two-sided pointwise p-value +#' (\code{2 * (1 - pnorm(abs(statistic)))}). #' Marginal per-estimate; does \strong{not} account for multiple testing #' across ATT(g,t) cells.} #' \item{conf.low, conf.high}{simultaneous confidence band limits, using the @@ -56,6 +58,7 @@ nobs.AGGTEobj <- function(object, ...) { #' \code{cband=TRUE}, otherwise pointwise} #' \item{point.conf.low, point.conf.high}{pointwise confidence interval limits #' using \code{qnorm(1 - alp/2)}, always} +#' } #' #' @export tidy.MP <- function(x, ...) { @@ -100,13 +103,15 @@ glance.MP <- function(x, ...) { #' @param ... Additional arguments to tidying method. #' #' @return A data frame whose columns depend on \code{type}: +#' \describe{ #' \item{type}{the aggregation type: \code{"simple"}, \code{"dynamic"}, #' \code{"group"}, or \code{"calendar"}} #' \item{term}{label for each estimate} #' \item{estimate}{point estimate} #' \item{std.error}{standard error} #' \item{statistic}{t-statistic (\code{estimate / std.error})} -#' \item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +#' \item{p.value}{two-sided pointwise p-value +#' (\code{2 * (1 - pnorm(abs(statistic)))}). #' Marginal per-estimate; does \strong{not} account for multiple testing #' across event times or groups.} #' \item{conf.low, conf.high}{simultaneous confidence band limits. When @@ -117,6 +122,7 @@ glance.MP <- function(x, ...) { #' pointwise coincide.} #' \item{point.conf.low, point.conf.high}{pointwise confidence interval limits #' always using \code{qnorm(1 - alp/2)}.} +#' } #' #' @details #' The key distinction between \code{conf.low}/\code{conf.high} and diff --git a/man/tidy.AGGTEobj.Rd b/man/tidy.AGGTEobj.Rd index f9a51129..7c120372 100644 --- a/man/tidy.AGGTEobj.Rd +++ b/man/tidy.AGGTEobj.Rd @@ -13,13 +13,15 @@ } \value{ A data frame whose columns depend on \code{type}: +\describe{ \item{type}{the aggregation type: \code{"simple"}, \code{"dynamic"}, \code{"group"}, or \code{"calendar"}} \item{term}{label for each estimate} \item{estimate}{point estimate} \item{std.error}{standard error} \item{statistic}{t-statistic (\code{estimate / std.error})} -\item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +\item{p.value}{two-sided pointwise p-value +(\code{2 * (1 - pnorm(abs(statistic)))}). Marginal per-estimate; does \strong{not} account for multiple testing across event times or groups.} \item{conf.low, conf.high}{simultaneous confidence band limits. When @@ -31,6 +33,7 @@ pointwise coincide.} \item{point.conf.low, point.conf.high}{pointwise confidence interval limits always using \code{qnorm(1 - alp/2)}.} } +} \description{ Returns a tidy data frame of aggregated treatment effect estimates from an \code{\link[=aggte]{aggte()}} result. diff --git a/man/tidy.MP.Rd b/man/tidy.MP.Rd index 9164265f..b0056312 100644 --- a/man/tidy.MP.Rd +++ b/man/tidy.MP.Rd @@ -13,13 +13,15 @@ } \value{ A data frame with one row per ATT(g,t) estimate and columns: +\describe{ \item{term}{ATT(g,t) label} \item{group}{the treatment cohort g} \item{time}{the time period t} \item{estimate}{the ATT(g,t) point estimate} \item{std.error}{standard error} \item{statistic}{t-statistic (\code{estimate / std.error})} -\item{p.value}{two-sided pointwise p-value (\code{2*(1-pnorm(|t|))}). +\item{p.value}{two-sided pointwise p-value +(\code{2 * (1 - pnorm(abs(statistic)))}). Marginal per-estimate; does \strong{not} account for multiple testing across ATT(g,t) cells.} \item{conf.low, conf.high}{simultaneous confidence band limits, using the @@ -28,6 +30,7 @@ bootstrap uniform critical value when \code{bstrap=TRUE} and \item{point.conf.low, point.conf.high}{pointwise confidence interval limits using \code{qnorm(1 - alp/2)}, always} } +} \description{ Returns a tidy data frame of group-time average treatment effect estimates from an \code{\link[=att_gt]{att_gt()}} result. diff --git a/tests/testthat/test-tidy.R b/tests/testthat/test-tidy.R new file mode 100644 index 00000000..ad1cd703 --- /dev/null +++ b/tests/testthat/test-tidy.R @@ -0,0 +1,119 @@ + +#----------------------------------------------------------------------------- +# +# Tests for tidy(), glance(), and nobs() S3 methods +# +#----------------------------------------------------------------------------- + +data(mpdta, package = "did") + +set.seed(1234) +mp <- att_gt( + yname = "lemp", + gname = "first.treat", + idname = "countyreal", + tname = "year", + xformla = ~1, + data = mpdta, + bstrap = TRUE, + cband = TRUE +) + +agg_dyn <- aggte(mp, type = "dynamic") +agg_group <- aggte(mp, type = "group") +agg_calendar <- aggte(mp, type = "calendar") +agg_simple <- aggte(mp, type = "simple") + +# --------------------------------------------------------------------------- +# tidy.MP +# --------------------------------------------------------------------------- + +test_that("tidy.MP returns expected columns", { + td <- broom::tidy(mp) + expect_s3_class(td, "data.frame") + expect_true(all(c("term", "group", "time", "estimate", "std.error", + "statistic", "p.value", + "conf.low", "conf.high", + "point.conf.low", "point.conf.high") %in% names(td))) +}) + +test_that("tidy.MP statistic equals estimate / std.error", { + td <- broom::tidy(mp) + expect_equal(td$statistic, td$estimate / td$std.error) +}) + +test_that("tidy.MP p.value matches normal approximation", { + td <- broom::tidy(mp) + expected <- 2 * (1 - pnorm(abs(td$statistic))) + expect_equal(td$p.value, expected) +}) + +test_that("tidy.MP p.value is between 0 and 1", { + td <- broom::tidy(mp) + expect_true(all(td$p.value >= 0 & td$p.value <= 1, na.rm = TRUE)) +}) + +# --------------------------------------------------------------------------- +# tidy.AGGTEobj +# --------------------------------------------------------------------------- + +test_that("tidy.AGGTEobj (dynamic) returns expected columns", { + td <- broom::tidy(agg_dyn) + expect_true(all(c("type", "term", "event.time", "estimate", "std.error", + "statistic", "p.value", + "conf.low", "conf.high", + "point.conf.low", "point.conf.high") %in% names(td))) +}) + +test_that("tidy.AGGTEobj (group) returns expected columns", { + td <- broom::tidy(agg_group) + expect_true(all(c("type", "term", "group", "estimate", "std.error", + "statistic", "p.value", + "conf.low", "conf.high", + "point.conf.low", "point.conf.high") %in% names(td))) +}) + +test_that("tidy.AGGTEobj (calendar) returns expected columns", { + td <- broom::tidy(agg_calendar) + expect_true(all(c("type", "term", "estimate", "std.error", + "statistic", "p.value", + "conf.low", "conf.high", + "point.conf.low", "point.conf.high") %in% names(td))) +}) + +test_that("tidy.AGGTEobj (simple) returns expected columns", { + td <- broom::tidy(agg_simple) + expect_true(all(c("type", "term", "estimate", "std.error", + "statistic", "p.value", + "conf.low", "conf.high", + "point.conf.low", "point.conf.high") %in% names(td))) +}) + +test_that("tidy.AGGTEobj statistic and p.value are consistent", { + for (agg in list(agg_dyn, agg_group, agg_calendar, agg_simple)) { + td <- broom::tidy(agg) + expect_equal(td$statistic, td$estimate / td$std.error) + expect_equal(td$p.value, 2 * (1 - pnorm(abs(td$statistic)))) + } +}) + +# --------------------------------------------------------------------------- +# nobs +# --------------------------------------------------------------------------- + +test_that("nobs.MP returns number of unique units", { + n <- stats::nobs(mp) + expect_type(n, "integer") + expect_equal(n, length(unique(mpdta$countyreal))) +}) + +test_that("nobs.AGGTEobj returns number of unique units", { + for (agg in list(agg_dyn, agg_group, agg_calendar, agg_simple)) { + n <- stats::nobs(agg) + expect_equal(n, length(unique(mpdta$countyreal))) + } +}) + +test_that("nobs.MP and nobs.AGGTEobj agree", { + expect_equal(stats::nobs(mp), stats::nobs(agg_dyn)) +}) From 960d9defb222b8669800e459b0bdc8ba90366635 Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 14:25:03 -0500 Subject: [PATCH 8/9] Address Copilot review: coerce nobs to integer, fix test warnings (bstrap/cband=FALSE) --- R/tidy.R | 6 +++--- tests/testthat/test-tidy.R | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 4439a75d..ca47388a 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -14,7 +14,7 @@ generics::glance #' @return Integer. The number of unique units in the data. #' @export nobs.MP <- function(object, ...) { - object$n + as.integer(object$n) } #' Number of observations used to fit an AGGTEobj object @@ -26,9 +26,9 @@ nobs.MP <- function(object, ...) { #' @export nobs.AGGTEobj <- function(object, ...) { if (object$DIDparams$faster_mode) { - object$DIDparams$id_count + as.integer(object$DIDparams$id_count) } else { - object$DIDparams$n + as.integer(object$DIDparams$n) } } diff --git a/tests/testthat/test-tidy.R b/tests/testthat/test-tidy.R index ad1cd703..83afc7ac 100644 --- a/tests/testthat/test-tidy.R +++ b/tests/testthat/test-tidy.R @@ -15,14 +15,13 @@ mp <- att_gt( tname = "year", xformla = ~1, data = mpdta, - bstrap = TRUE, - cband = TRUE + bstrap = FALSE ) -agg_dyn <- aggte(mp, type = "dynamic") -agg_group <- aggte(mp, type = "group") -agg_calendar <- aggte(mp, type = "calendar") -agg_simple <- aggte(mp, type = "simple") +agg_dyn <- aggte(mp, type = "dynamic", bstrap = FALSE, cband = FALSE) +agg_group <- aggte(mp, type = "group", bstrap = FALSE, cband = FALSE) +agg_calendar <- aggte(mp, type = "calendar", bstrap = FALSE, cband = FALSE) +agg_simple <- aggte(mp, type = "simple", bstrap = FALSE, cband = FALSE) # --------------------------------------------------------------------------- # tidy.MP From 7288124d840406fbfd8dab3c02418e3d79bb1a34 Mon Sep 17 00:00:00 2001 From: pedrohcgs Date: Mon, 2 Mar 2026 14:40:35 -0500 Subject: [PATCH 9/9] Address Copilot review: nobs titles, broom in Suggests, fix test header --- DESCRIPTION | 3 ++- R/tidy.R | 8 ++++---- man/nobs.AGGTEobj.Rd | 6 +++--- man/nobs.MP.Rd | 6 +++--- tests/testthat/test-tidy.R | 4 +++- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2c28a058..e239a2ea 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,4 +30,5 @@ Suggests: here, knitr, covr, - backports + backports, + broom diff --git a/R/tidy.R b/R/tidy.R index ca47388a..bb176f5d 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -6,23 +6,23 @@ generics::tidy #' @export generics::glance -#' Number of observations used to fit an MP object +#' Number of unique cross-sectional units in an MP object #' #' @importFrom stats nobs #' @param object a model of class MP produced by the [att_gt()] function #' @param ... additional arguments (ignored) -#' @return Integer. The number of unique units in the data. +#' @return Integer. The number of unique cross-sectional units in the data. #' @export nobs.MP <- function(object, ...) { as.integer(object$n) } -#' Number of observations used to fit an AGGTEobj object +#' Number of unique cross-sectional units in an AGGTEobj object #' #' @importFrom stats nobs #' @param object a model of class AGGTEobj produced by the [aggte()] function #' @param ... additional arguments (ignored) -#' @return Integer. The number of unique units in the data. +#' @return Integer. The number of unique cross-sectional units in the data. #' @export nobs.AGGTEobj <- function(object, ...) { if (object$DIDparams$faster_mode) { diff --git a/man/nobs.AGGTEobj.Rd b/man/nobs.AGGTEobj.Rd index 9e261250..a52425f0 100644 --- a/man/nobs.AGGTEobj.Rd +++ b/man/nobs.AGGTEobj.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tidy.R \name{nobs.AGGTEobj} \alias{nobs.AGGTEobj} -\title{Number of observations used to fit an AGGTEobj object} +\title{Number of unique cross-sectional units in an AGGTEobj object} \usage{ \method{nobs}{AGGTEobj}(object, ...) } @@ -12,8 +12,8 @@ \item{...}{additional arguments (ignored)} } \value{ -Integer. The number of unique units in the data. +Integer. The number of unique cross-sectional units in the data. } \description{ -Number of observations used to fit an AGGTEobj object +Number of unique cross-sectional units in an AGGTEobj object } diff --git a/man/nobs.MP.Rd b/man/nobs.MP.Rd index 6d3bec68..bac9e665 100644 --- a/man/nobs.MP.Rd +++ b/man/nobs.MP.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tidy.R \name{nobs.MP} \alias{nobs.MP} -\title{Number of observations used to fit an MP object} +\title{Number of unique cross-sectional units in an MP object} \usage{ \method{nobs}{MP}(object, ...) } @@ -12,8 +12,8 @@ \item{...}{additional arguments (ignored)} } \value{ -Integer. The number of unique units in the data. +Integer. The number of unique cross-sectional units in the data. } \description{ -Number of observations used to fit an MP object +Number of unique cross-sectional units in an MP object } diff --git a/tests/testthat/test-tidy.R b/tests/testthat/test-tidy.R index 83afc7ac..727642bd 100644 --- a/tests/testthat/test-tidy.R +++ b/tests/testthat/test-tidy.R @@ -1,10 +1,12 @@ #----------------------------------------------------------------------------- # -# Tests for tidy(), glance(), and nobs() S3 methods +# Tests for tidy() and nobs() S3 methods # #----------------------------------------------------------------------------- +skip_if_not_installed("broom") + data(mpdta, package = "did") set.seed(1234)